Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Martin fileinfo cache 2 #2833

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion doc/developer_guide/module_manager.rst
Original file line number Diff line number Diff line change
Expand Up @@ -169,18 +169,49 @@ of `ModuleManager`.

.. testcode ::

mod_manager = ModuleManager.get(use_caching=True)
mod_manager = ModuleManager.get(cache_active=True)


Most of the time in the PSyIR generation is currently spent in the
fparser tree generation. Consequently, this leads to significant
speed-ups in the process of reading and parsing the source code
of modules.



Default cache file locations
----------------------------


The default cache file is named the same way as the source file,
but replaces the file extension with `.psycache`. E.g., a cache file
for the source file `foo.f90` will be called `foo.psycache`.



(Global) cache file folder
--------------------------

To avoid storing cache files together with source code files,
a path can be provided to the module manager.

.. testcode ::

mod_manager = ModuleManager.get(cache_active=True,
cache_path="/tmp/my_cache_path")

A cache file name will then be created based on the hashsum of each
source code file. The combination of the provided `cache_path` and
the cache file name will then be used as the storage location.

A standard storage path could be, e.g., `$HOME/.cache/psyclone`.
Note, that the cache path directory must exist.



Caching algorithm
-----------------

The caching algorithm to obtain the fparser tree OR PSyIR is briefly described as follows:

- If fparser tree / PSyIR was read before: RETURN fparser tree or PSyIR
Expand Down
117 changes: 94 additions & 23 deletions src/psyclone/parse/file_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,28 @@ class FileInfo:
object holds information on. Can also be set to 'None' in case of
providing fparser / PSyIR node in a different way.
:param use_caching: Use caching of intermediate representations
:param cache_path: Path to directory where to put cache files.
If it is provided, the file name for caching will include
the hash sum to avoid conflicting file names.
This allows using, e.g., `~/.cache/psyclone` as a cache
directory for all cached files.
See _get_filepath_cache() for more information.

"""
def __init__(self, filepath: str, use_caching: bool = False):
def __init__(self,
filepath: str,
cache_active: bool = False,
cache_path: str = None
):

# Full path to file
self._filename: str = filepath

# Use cache features
self._use_caching: bool = use_caching
self._cache_active: bool = cache_active

# Cache filepath
self._cache_path = cache_path

# Source code:
self._source_code: str = None
Expand All @@ -113,9 +126,8 @@ def __init__(self, filepath: str, use_caching: bool = False):
# Psyir node
self._psyir_node: FileContainer = None

# Single cache file
(path, ext) = os.path.splitext(self._filename)
self._filepath_cache = path + ".psycache"
# Filepath to cache
self._cache_filename = None

# This reference to `_CacheFileInfo` is created when loading
# cached information from a cache file.
Expand All @@ -131,6 +143,42 @@ def __init__(self, filepath: str, use_caching: bool = False):
# is requested.
self._cache_data_save: _CacheFileInfo = None

def _get_filepath_cache(self):
"""Return the filepath of the cache.

This also supports having a shared caching directory,
e.g., in `$HOME/.cache/psyclone/`.

This sets up unique cache file names based on the
hashcode. Consequently, this can't be done in the
constructor since the hashcode of the source code
is required first.
"""

assert self._source_code_hash_sum is not None

assert self._cache_active, (
"Cache file path requested, but caching disabled")

if self._cache_filename is not None:
return self._cache_filename

if self._cache_path is None:
# If cache path is not specified, we use the source code path
# E.g.,
# path/to/file.f90 => path/to/file.psycache
(filepath_no_ext, _) = os.path.splitext(self._filename)

self._cache_filename = filepath_no_ext + ".psycache"
return self._cache_filename

# Cache path was specified.
# We assume this path is shared amongst many.
# Therefore, we associate each cache file to a hashsum.
return os.path.join(
self._cache_path, self._source_code_hash_sum[:55] + ".psycache"
)

@property
def basename(self):
'''
Expand Down Expand Up @@ -190,21 +238,27 @@ def get_source_code(self, verbose: bool = False) -> str:
f"FileInfo: No such file or directory '{self._filename}'."
) from err

if self._use_caching:
# Only update if caching is used.
# Compute hash sum which will be used to
# check cache of fparser tree
if verbose:
# TODO #11: Use logging for this
print(
f"- Source file '{self._filename}': "
f"Loading source code"
)

if self._cache_active:
# Update the hash sum
self._source_code_hash_sum = hashlib.md5(
self._source_code.encode()
).hexdigest()
self._source_code.encode()).hexdigest()

return self._source_code

def _cache_load(
self,
verbose: bool = False,
indent: str = ""
) -> _CacheFileInfo:
"""Load fparser parse tree from the cache file if possible.

This also checks for matching checksums after loading the data
from the cache.
The checksum is based solely on a hashsum of the source code itself,
Expand All @@ -213,7 +267,7 @@ def _cache_load(
:param verbose: Produce some verbose output
"""

if not self._use_caching:
if not self._cache_active:
return

# Load the source code in case it's not yet loaded.
Expand All @@ -230,18 +284,29 @@ def _cache_load(
# basically garbage. This will lead either to an Exception from the
# unpickling or a non-matching checksum which is both caught below.
try:
filehandler = open(self._filepath_cache, "rb")
filehandler = open(self._get_filepath_cache(), "rb")
if verbose:
# TODO #11: Use logging for this
print(
f"{indent}- Using cache file "
f"'{self._get_filepath_cache()}'"
)
except FileNotFoundError:
if verbose:
# TODO #11: Use logging for this
print(f" - No cache file '{self._filepath_cache}' found")
print(
f"{indent}- No cache file "
f"'{self._get_filepath_cache()}' found"
)
return None

# Unpack cache file
try:
cache: _CacheFileInfo = pickle.load(filehandler)
except Exception as ex:
print(f" - Error while reading cache file - ignoring: {str(ex)}")
print(f"{indent} - Error while reading cache file -"
f" ignoring: {str(ex)}"
)
return None

# Verify checksums
Expand Down Expand Up @@ -270,7 +335,7 @@ def _cache_save(
:param verbose: Produce some verbose output
"""

if not self._use_caching:
if not self._cache_active:
return None

if self._source_code_hash_sum is None:
Expand Down Expand Up @@ -328,13 +393,13 @@ def _cache_save(

# We first remove a potentially existing file
try:
os.remove(self._filepath_cache)
os.remove(self._get_filepath_cache())
except FileNotFoundError:
pass

# Then we open it in exclusive mode.
# If it already exists, an exception would be raised.
fd = os.open(self._filepath_cache,
fd = os.open(self._get_filepath_cache(),
os.O_CREAT | os.O_WRONLY | os.O_EXCL)

filehandler = os.fdopen(fd, "wb")
Expand Down Expand Up @@ -431,10 +496,16 @@ def get_fparser_tree(

return self._fparser_tree

def get_psyir(self, verbose: bool = False) -> FileContainer:
"""Returns the PSyIR FileContainer of the file.
def get_psyir(
self,
verbose: bool = False,
indent: str = ""
) -> FileContainer:
"""Returns the psyclone FileContainer of the file.

:param verbose: Produce some verbose output
:param indent: String used for indentation of each line
for verbose output.

:returns: PSyIR file container node.

Expand All @@ -443,21 +514,21 @@ def get_psyir(self, verbose: bool = False) -> FileContainer:
return self._psyir_node

# Check for cache
self._cache_load(verbose=verbose)
self._cache_load(verbose=verbose, indent=indent)

if self._cache_data_load is not None:
if self._cache_data_load._psyir_node is not None:
# Use cached version
if verbose:
# TODO #11: Use logging for this
print(" - Using cache of PSyIR")
print(f"{indent}- Using cache of PSyIR")

self._psyir_node = self._cache_data_load._psyir_node
return self._psyir_node

if verbose:
# TODO #11: Use logging for this
print(f" - Running psyir for '{self._filename}'")
print(f"{indent}- Running psyir for '{self._filename}'")

# First, we get the fparser tree
fparse_tree = self.get_fparser_tree(
Expand Down
23 changes: 20 additions & 3 deletions src/psyclone/parse/module_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from fparser.two.utils import walk

from psyclone.errors import InternalError, PSycloneError, GenerationError
from psyclone.psyir.nodes import Container
from psyclone.psyir.nodes import Container, Routine
from psyclone.psyir.symbols import Symbol
from psyclone.parse import FileInfo, FileInfoFParserError

Expand Down Expand Up @@ -87,7 +87,8 @@ class ModuleInfo:
def __init__(
self,
module_name: str,
file_info: FileInfo
file_info: FileInfo,
psyir_container_node: Container = None
):
if not isinstance(module_name, str):
raise TypeError("Expected type 'str' for argument 'module_name'")
Expand All @@ -102,7 +103,7 @@ def __init__(
self._file_info: FileInfo = file_info

# The PSyIR representation
self._psyir_container_node: Container = None
self._psyir_container_node: Container = psyir_container_node

# A cache for the module dependencies: this is just a set
# of all modules USEd by this module.
Expand Down Expand Up @@ -324,6 +325,22 @@ def get_symbol(self, name: str) -> Union[Symbol, None]:
except KeyError:
return None

def get_routine_by_name(
self, routine_name: str, trigger_exception: bool = True
) -> Routine:
routine_found: Routine = None

for routine in self.get_psyir().walk(Routine):
routine: Routine
if routine.name.lower() == routine_name.lower():
routine_found = routine

if trigger_exception:
if routine_found is None:
raise ModuleInfoError(f"Subroutine '{routine_name}' not found")

return routine_found

def view_tree(self, indent=""):
"""
Show the module information with markdown style in a tree-like
Expand Down
Loading
Loading