stfc · schreiberx · Nov 30, 2024 · Nov 30, 2024 · Nov 30, 2024 · Nov 30, 2024
diff --git a/doc/developer_guide/module_manager.rst b/doc/developer_guide/module_manager.rst
@@ -169,18 +169,49 @@ of `ModuleManager`.
 
 .. testcode ::
 
-    mod_manager = ModuleManager.get(use_caching=True)
+    mod_manager = ModuleManager.get(cache_active=True)
 
 
 Most of the time in the PSyIR generation is currently spent in the
 fparser tree generation. Consequently, this leads to significant
 speed-ups in the process of reading and parsing the source code
 of modules.
 
+
+
+Default cache file locations
+----------------------------
+
+
 The default cache file is named the same way as the source file,
 but replaces the file extension with `.psycache`. E.g., a cache file
 for the source file `foo.f90` will be called `foo.psycache`.
 
+
+
+(Global) cache file folder
+--------------------------
+
+To avoid storing cache files together with source code files,
+a path can be provided to the module manager.
+
+.. testcode ::
+
+    mod_manager = ModuleManager.get(cache_active=True,
+                     cache_path="/tmp/my_cache_path")
+
+A cache file name will then be created based on the hashsum of each
+source code file. The combination of the provided `cache_path` and
+the cache file name will then be used as the storage location.
+
+A standard storage path could be, e.g., `$HOME/.cache/psyclone`.
+Note, that the cache path directory must exist.
+
+
+
+Caching algorithm
+-----------------
+
 The caching algorithm to obtain the fparser tree OR PSyIR is briefly described as follows:
 
 - If fparser tree / PSyIR was read before: RETURN fparser tree or PSyIR

diff --git a/src/psyclone/parse/file_info.py b/src/psyclone/parse/file_info.py
@@ -87,15 +87,28 @@ class FileInfo:
         object holds information on. Can also be set to 'None' in case of
         providing fparser / PSyIR node in a different way.
     :param use_caching: Use caching of intermediate representations
+    :param cache_path: Path to directory where to put cache files.
+        If it is provided, the file name for caching will include
+        the hash sum to avoid conflicting file names.
+        This allows using, e.g., `~/.cache/psyclone` as a cache
+        directory for all cached files.
+        See _get_filepath_cache() for more information.
 
     """
-    def __init__(self, filepath: str, use_caching: bool = False):
+    def __init__(self,
+                 filepath: str,
+                 cache_active: bool = False,
+                 cache_path: str = None
+                 ):
 
         # Full path to file
         self._filename: str = filepath
 
         # Use cache features
-        self._use_caching: bool = use_caching
+        self._cache_active: bool = cache_active
+
+        # Cache filepath
+        self._cache_path = cache_path
 
         # Source code:
         self._source_code: str = None
@@ -113,9 +126,8 @@ def __init__(self, filepath: str, use_caching: bool = False):
         # Psyir node
         self._psyir_node: FileContainer = None
 
-        # Single cache file
-        (path, ext) = os.path.splitext(self._filename)
-        self._filepath_cache = path + ".psycache"
+        # Filepath to cache
+        self._cache_filename = None
 
         # This reference to `_CacheFileInfo` is created when loading
         # cached information from a cache file.
@@ -131,6 +143,42 @@ def __init__(self, filepath: str, use_caching: bool = False):
         # is requested.
         self._cache_data_save: _CacheFileInfo = None
 
+    def _get_filepath_cache(self):
+        """Return the filepath of the cache.
+
+        This also supports having a shared caching directory,
+        e.g., in `$HOME/.cache/psyclone/`.
+
+        This sets up unique cache file names based on the
+        hashcode. Consequently, this can't be done in the
+        constructor since the hashcode of the source code
+        is required first.
+        """
+
+        assert self._source_code_hash_sum is not None
+
+        assert self._cache_active, (
+            "Cache file path requested, but caching disabled")
+
+        if self._cache_filename is not None:
+            return self._cache_filename
+
+        if self._cache_path is None:
+            # If cache path is not specified, we use the source code path
+            # E.g.,
+            # path/to/file.f90 => path/to/file.psycache
+            (filepath_no_ext, _) = os.path.splitext(self._filename)
+
+            self._cache_filename = filepath_no_ext + ".psycache"
+            return self._cache_filename
+
+        # Cache path was specified.
+        # We assume this path is shared amongst many.
+        # Therefore, we associate each cache file to a hashsum.
+        return os.path.join(
+            self._cache_path, self._source_code_hash_sum[:55] + ".psycache"
+        )
+
     @property
     def basename(self):
         '''
@@ -190,21 +238,27 @@ def get_source_code(self, verbose: bool = False) -> str:
                 f"FileInfo: No such file or directory '{self._filename}'."
             ) from err
 
-        if self._use_caching:
-            # Only update if caching is used.
-            # Compute hash sum which will be used to
-            # check cache of fparser tree
+        if verbose:
+            # TODO #11: Use logging for this
+            print(
+                f"- Source file '{self._filename}': "
+                f"Loading source code"
+            )
+
+        if self._cache_active:
+            # Update the hash sum
             self._source_code_hash_sum = hashlib.md5(
-                self._source_code.encode()
-            ).hexdigest()
+                self._source_code.encode()).hexdigest()
 
         return self._source_code
 
     def _cache_load(
         self,
         verbose: bool = False,
+        indent: str = ""
     ) -> _CacheFileInfo:
         """Load fparser parse tree from the cache file if possible.
+
         This also checks for matching checksums after loading the data
         from the cache.
         The checksum is based solely on a hashsum of the source code itself,
@@ -213,7 +267,7 @@ def _cache_load(
         :param verbose: Produce some verbose output
         """
 
-        if not self._use_caching:
+        if not self._cache_active:
             return
 
         # Load the source code in case it's not yet loaded.
@@ -230,18 +284,29 @@ def _cache_load(
         # basically garbage. This will lead either to an Exception from the
         # unpickling or a non-matching checksum which is both caught below.
         try:
-            filehandler = open(self._filepath_cache, "rb")
+            filehandler = open(self._get_filepath_cache(), "rb")
+            if verbose:
+                # TODO #11: Use logging for this
+                print(
+                    f"{indent}- Using cache file "
+                    f"'{self._get_filepath_cache()}'"
+                )
         except FileNotFoundError:
             if verbose:
                 # TODO #11: Use logging for this
-                print(f"  - No cache file '{self._filepath_cache}' found")
+                print(
+                    f"{indent}- No cache file "
+                    f"'{self._get_filepath_cache()}' found"
+                )
             return None
 
         # Unpack cache file
         try:
             cache: _CacheFileInfo = pickle.load(filehandler)
         except Exception as ex:
-            print(f"  - Error while reading cache file - ignoring: {str(ex)}")
+            print(f"{indent}  - Error while reading cache file -"
+                  f" ignoring: {str(ex)}"
+                  )
             return None
 
         # Verify checksums
@@ -270,7 +335,7 @@ def _cache_save(
         :param verbose: Produce some verbose output
         """
 
-        if not self._use_caching:
+        if not self._cache_active:
             return None
 
         if self._source_code_hash_sum is None:
@@ -328,13 +393,13 @@ def _cache_save(
 
             # We first remove a potentially existing file
             try:
-                os.remove(self._filepath_cache)
+                os.remove(self._get_filepath_cache())
             except FileNotFoundError:
                 pass
 
             # Then we open it in exclusive mode.
             # If it already exists, an exception would be raised.
-            fd = os.open(self._filepath_cache,
+            fd = os.open(self._get_filepath_cache(),
                          os.O_CREAT | os.O_WRONLY | os.O_EXCL)
 
             filehandler = os.fdopen(fd, "wb")
@@ -431,10 +496,16 @@ def get_fparser_tree(
 
         return self._fparser_tree
 
-    def get_psyir(self, verbose: bool = False) -> FileContainer:
-        """Returns the PSyIR FileContainer of the file.
+    def get_psyir(
+            self,
+            verbose: bool = False,
+            indent: str = ""
+            ) -> FileContainer:
+        """Returns the psyclone FileContainer of the file.
 
         :param verbose: Produce some verbose output
+        :param indent: String used for indentation of each line
+            for verbose output.
 
         :returns: PSyIR file container node.
 
@@ -443,21 +514,21 @@ def get_psyir(self, verbose: bool = False) -> FileContainer:
             return self._psyir_node
 
         # Check for cache
-        self._cache_load(verbose=verbose)
+        self._cache_load(verbose=verbose, indent=indent)
 
         if self._cache_data_load is not None:
             if self._cache_data_load._psyir_node is not None:
                 # Use cached version
                 if verbose:
                     # TODO #11: Use logging for this
-                    print("  - Using cache of PSyIR")
+                    print(f"{indent}- Using cache of PSyIR")
 
                 self._psyir_node = self._cache_data_load._psyir_node
                 return self._psyir_node
 
         if verbose:
             # TODO #11: Use logging for this
-            print(f"  - Running psyir for '{self._filename}'")
+            print(f"{indent}- Running psyir for '{self._filename}'")
 
         # First, we get the fparser tree
         fparse_tree = self.get_fparser_tree(

diff --git a/src/psyclone/parse/module_info.py b/src/psyclone/parse/module_info.py
@@ -48,7 +48,7 @@
 from fparser.two.utils import walk
 
 from psyclone.errors import InternalError, PSycloneError, GenerationError
-from psyclone.psyir.nodes import Container
+from psyclone.psyir.nodes import Container, Routine
 from psyclone.psyir.symbols import Symbol
 from psyclone.parse import FileInfo, FileInfoFParserError
 
@@ -87,7 +87,8 @@ class ModuleInfo:
     def __init__(
         self,
         module_name: str,
-        file_info: FileInfo
+        file_info: FileInfo,
+        psyir_container_node: Container = None
     ):
         if not isinstance(module_name, str):
             raise TypeError("Expected type 'str' for argument 'module_name'")
@@ -102,7 +103,7 @@ def __init__(
         self._file_info: FileInfo = file_info
 
         # The PSyIR representation
-        self._psyir_container_node: Container = None
+        self._psyir_container_node: Container = psyir_container_node
 
         # A cache for the module dependencies: this is just a set
         # of all modules USEd by this module.
@@ -324,6 +325,22 @@ def get_symbol(self, name: str) -> Union[Symbol, None]:
         except KeyError:
             return None
 
+    def get_routine_by_name(
+        self, routine_name: str, trigger_exception: bool = True
+    ) -> Routine:
+        routine_found: Routine = None
+
+        for routine in self.get_psyir().walk(Routine):
+            routine: Routine
+            if routine.name.lower() == routine_name.lower():
+                routine_found = routine
+
+        if trigger_exception:
+            if routine_found is None:
+                raise ModuleInfoError(f"Subroutine '{routine_name}' not found")
+
+        return routine_found
+
     def view_tree(self, indent=""):
         """
         Show the module information with markdown style in a tree-like