Source code for psyclone.psyir.nodes.codeblock

# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2017-2026, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Authors R. W. Ford, A. R. Porter and S. Siso, STFC Daresbury Lab
#         I. Kavcic, Met Office
#         J. Henrichs, Bureau of Meteorology
# -----------------------------------------------------------------------------

''' This module contains the CodeBlock node implementation.'''

from __future__ import annotations
import re
from enum import Enum
from typing import Optional, Any, Union

from psyclone.configuration import Config
from psyclone.core import AccessType, Signature, VariablesAccessMap
from psyclone.errors import InternalError
from psyclone.psyir.nodes.statement import Statement
from psyclone.psyir.nodes.datanode import DataNode
from psyclone.psyir.nodes.node import Node


[docs] class CodeBlock(Statement, DataNode): '''Node representing any generic Fortran code that PSyclone does not attempt to manipulate. As such it is a leaf in the PSyIR. A CodeBlock can still answer limited questions about the enclosed code. For this reason it keeps reference to the underlying parse_tree, and each frontend parser needs to subclass CodeBlock with the concrete implementation. :param parse_tree: the parse-tree nodes representing the Fortran code constituting the code block. :param structure: argument indicating whether this code block is a statement or an expression. :type structure: :py:class:`psyclone.psyir.nodes.CodeBlock.Structure` :param parent: the parent node of this code block in the PSyIR. :type parent: :py:class:`psyclone.psyir.nodes.Node` :param annotations: tags that provide additional information about the node. The node should still be functionally correct when ignoring these tags. :type annotations: list[str | NoneType] ''' #: Textual description of the node. _children_valid_format = "<LeafNode>" _text_name = "CodeBlock" _colour = "red" #: The annotations that are supported by this node. #: psy-data-start - this node has replaced a PSyDataNode during the #: lowering of the PSyIR to language level. valid_annotations = ("psy-data-start", )
[docs] class Structure(Enum): ''' Enumeration that captures the structure of the code block which may be required when processing. ''' #: The Code Block comprises one or more Fortran statements #: (which themselves may contain expressions). STATEMENT = 1 #: The Code Block comprises one or more Fortran expressions. EXPRESSION = 2
def __init__( self, parse_tree: Union[Any, list[Any]], structure: CodeBlock.Structure, parent: Optional[Node] = None, annotations: Optional[list[str]] = None ): super().__init__(parent=parent, annotations=annotations) # Store a list of the parser objects holding the code associated # with this block. We make a copy of the list container because # the list itself is often a temporary product of the process of # converting from the the parse tree to the PSyIR. if isinstance(parse_tree, list): self._parse_tree_nodes = parse_tree[:] else: self._parse_tree_nodes = [parse_tree] # Store the structure of the code block. self._structure = structure
[docs] @staticmethod def create(*args, **kwargs) -> CodeBlock: ''' :returns: a CodeBlock node for the given source code using the appropriate CodeBlock subclass. :raises InternalError: if a frontend does not have an associated CodeBlock subclass. ''' frontend = Config.get().frontend if frontend == "fparser2": return Fparser2CodeBlock.create(*args, **kwargs) if frontend == "treesitter": return TreeSitterCodeBlock.create(*args, **kwargs) raise InternalError( f"The '{frontend}' frontend does not have an associated " f"CodeBlock subclass")
def __eq__(self, other: Any) -> bool: ''' Checks whether two nodes are equal. Two CodeBlock nodes are equal if they are the same type, their ast_nodes lists are equal (which means the same instance) and have the same structure. :param other: the object to check equality to. :returns: whether other is equal to self. ''' is_eq = super().__eq__(other) is_eq = is_eq and self.parse_tree_nodes == other.parse_tree_nodes is_eq = is_eq and self.structure == other.structure return is_eq @property def structure(self) -> CodeBlock.Structure: ''' :returns: whether this code block is a statement or an expression. ''' return self._structure @property def parse_tree_nodes(self) -> list[Any]: ''' :returns: the nodes associated with this code block in the original parse tree. ''' return self._parse_tree_nodes
[docs] def node_str(self, colour: bool = True) -> str: ''' Create a text description of this node in the schedule, optionally including control codes for colour. :param colour: whether or not to include control codes for colour. :return: text description of this node. ''' return (f"{self.coloured_name(colour)}[" f"{list(map(type, self._parse_tree_nodes))}]")
[docs] def reference_accesses(self) -> VariablesAccessMap: ''' Get the symbol access map. Since this is a CodeBlock we only know the names of symbols accessed within it but not how they are accessed. Therefore we err on the side of caution and mark them all as READWRITE, unfortunately, this will include the names of any routines that are called. TODO #2863 - it would be better to use AccessType.UNKNOWN here but currently VariablesAccessMap does not consider that type of access. This method makes use of :py:meth:`~psyclone.psyir.nodes.CodeBlock.get_symbol_names` and is therefore subject to the same limitations as that method. :returns: a map of all the symbol accessed inside this node, the keys are Signatures (unique identifiers to a symbol and its structure accessors) and the values are AccessSequence (a sequence of AccessTypes). ''' var_accesses = VariablesAccessMap() for name in self.get_symbol_names(): var_accesses.add_access(Signature(name), AccessType.READWRITE, self) return var_accesses
def __str__(self) -> str: return f"CodeBlock[{len(self._parse_tree_nodes)} nodes]"
[docs] def get_symbol_names(self) -> list[str]: ''' :returns: the name of all symbols accessed in the CodeBlock. ''' if not self._parse_tree_nodes: return [] raise NotImplementedError("Use appropriate CodeBlock subclass")
[docs] def has_potential_control_flow_jump(self) -> bool: ''' :returns: whether the Codeblock might have control flow jumps. ''' if not self._parse_tree_nodes: return False raise NotImplementedError("Use appropriate CodeBlock subclass")
[docs] def get_fortran_lines(self) -> list[str]: ''' :returns: a list of each line of fortran represented by this node. ''' if not self._parse_tree_nodes: return [] raise NotImplementedError("Use appropriate CodeBlock subclass")
[docs] class Fparser2CodeBlock(CodeBlock): ''' The fparser2 implementation of CodeBlock. '''
[docs] @staticmethod def create( source_code: str, partial_code: str, **kwargs ) -> Fparser2CodeBlock: ''' :param source_code: the given source :param partial_code: keyword to assist the parser with the starting node. :param kwargs: additional arguments to provide to the constructor. :returns: a CodeBlock node for the given source code using the Fparser2CodeBlock subclass. ''' if partial_code == "expression": structure = CodeBlock.Structure.EXPRESSION else: structure = CodeBlock.Structure.STATEMENT # Purposely inlined to lazily load this modules only when needed # pylint: disable=import-outside-toplevel from psyclone.psyir.frontend.fparser2 import Fparser2Reader reader = Fparser2Reader() tree = reader.generate_parse_tree_from_source(source_code, partial_code) return Fparser2CodeBlock(tree, structure, **kwargs)
[docs] def get_symbol_names(self) -> list[str]: ''' Analyses the fparser2 parse tree associated with this CodeBlock and returns the names of all symbols accessed within it. Since, by definition, we do not understand the contents of a CodeBlock, we do not attempt to analyse how these symbols are accessed - they are all marked as being READWRITE (this includes the names of any routines that might be called). Note that the names of any Fortran intrinsics are *not* included in the result. If the original code has unwisely overridden a Fortran intrinsic then fparser *may* incorrectly identify the use of such a variable/routine as still being an intrinsic call and, as such, it will be omitted from the names returned by this method. TODO #2863 - these limitations (blanket use of READWRITE and the ignoring of Fortran intrinsics) need to be re-visited. :returns: the symbol names used inside the CodeBock. ''' # Purposely inlined to lazily load this modules only when needed # pylint: disable=import-outside-toplevel from fparser.two import Fortran2003, pattern_tools from fparser.two.utils import walk parse_tree = self.parse_tree_nodes result = [] for node in walk(parse_tree, Fortran2003.Name): if isinstance(node.parent, Fortran2003.Else_If_Stmt): # Need to make sure we include any Symbol in the conditional # part but not a label (which would be the second child in the # parse tree). We cannot simply do # `node.parent.children.index(node)` because of fparser #174. if (len(node.parent.children) == 1 or node is node.parent.children[0]): result.append(node.string) elif not isinstance(node.parent, (Fortran2003.Cycle_Stmt, Fortran2003.End_Do_Stmt, Fortran2003.Exit_Stmt, Fortran2003.Else_Stmt, Fortran2003.End_If_Stmt)): # We don't want labels associated with loop or branch control. result.append(node.string) # Precision on literals requires special attention since they are just # stored in the tree as str (fparser/#456). for node in walk(parse_tree, (Fortran2003.Int_Literal_Constant, Fortran2003.Real_Literal_Constant, Fortran2003.Logical_Literal_Constant, Fortran2003.Char_Literal_Constant)): if node.items[1]: result.append(node.items[1]) # Complex literals require even more special attention. for node in walk(parse_tree, Fortran2003.Complex_Literal_Constant): # A complex literal constant has a real part and an imaginary part. # Each of these can have a kind. for part in node.items: if part.items[1]: result.append(part.items[1]) # For directives, we need to analyse all alphanumeric* parts of the # comment string and return any names that match a symbol in the # symbol table. for node in walk(parse_tree, Fortran2003.Directive): string_rep = node.tostr() string_rep = string_rep[string_rep.index("$"):] pattern = pattern_tools.name.get_compiled() matches = re.findall(pattern, string_rep) scope = self.scope for match in matches: sym = scope.symbol_table.lookup(match, otherwise=None) if sym: result.append(sym.name) return result
[docs] def has_potential_control_flow_jump(self) -> bool: ''' :returns: whether this CodeBlock contains a potential control flow jump, e.g. GOTO, EXIT or a labeled statement. ''' # Purposely inlined to lazily load this modules only when needed # pylint: disable=import-outside-toplevel from fparser.two import Fortran2003 from fparser.two.utils import walk # Loop over the fp2_nodes and check if any are GOTO, EXIT or # labelled statements for node in self._parse_tree_nodes: for child in walk(node, (Fortran2003.Goto_Stmt, Fortran2003.Exit_Stmt, Fortran2003.Cycle_Stmt, Fortran2003.StmtBase)): if isinstance(child, (Fortran2003.Goto_Stmt, Fortran2003.Exit_Stmt, Fortran2003.Cycle_Stmt)): return True # Also can't support Labelled statements. if isinstance(child, Fortran2003.StmtBase): if child.item and child.item.label: return True return False
[docs] def get_fortran_lines(self) -> list[str]: ''' :returns: a list of each line of fortran represented by this node. ''' output = [] for node in self._parse_tree_nodes: output.extend(node.tofortran().split("\n")) return output
[docs] class TreeSitterCodeBlock(CodeBlock): ''' The treesitter implementation of CodeBlock. '''
[docs] @staticmethod def create( source_code: str, partial_code: str = "", **kwargs ) -> TreeSitterCodeBlock: ''' :param source_code: the given source :param partial_code: keyword to assist the parser with the starting node. :param kwargs: additional arguments to provide to the constructor. :returns: a CodeBlock node for the given source code using the TreeSitterCodeBlock subclass. ''' if partial_code == "expression": structure = CodeBlock.Structure.EXPRESSION else: structure = CodeBlock.Structure.STATEMENT # Purposely inlined to lazily load this modules only when needed # pylint: disable=import-outside-toplevel from psyclone.psyir.frontend.fortran_treesitter_reader import \ FortranTreeSitterReader reader = FortranTreeSitterReader() tree = reader.generate_parse_tree_from_source(source_code) return TreeSitterCodeBlock(tree, structure, **kwargs)
[docs] def get_fortran_lines(self) -> list[str]: ''' :returns: a list of each line of fortran represented by this node. ''' output = [] for node in self._parse_tree_nodes: output.extend(str(node.text, encoding="utf8").split("\n")) return output