Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cache: configurable backends #16

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ script:

after_success:
- coveralls

branches:
only:
- master
6 changes: 4 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ def app(tmpdir):
return application.TimeGate(config=dict(
HOST='http://localhost',
BASE_URI='http://www.example.com/',
CACHE_USE=True,
CACHE_FILE=tmpdir.mkdir('cache').strpath,
CACHE_BACKEND='werkzeug.contrib.cache:FileSystemCache',
CACHE_OPTIONS={
'cache_dir': tmpdir.mkdir('cache').strpath,
},
))


Expand Down
10 changes: 5 additions & 5 deletions timegate/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def __init__(self, config=None, cache=None):
self.config.update(config or {})
if cache:
self.cache = cache
elif self.config['CACHE_USE']:
else:
self._build_default_cache()

@cached_property
Expand Down Expand Up @@ -143,10 +143,10 @@ def url_map(self):
def _build_default_cache(self):
"""Build default cache object."""
self.cache = Cache(
self.config['CACHE_FILE'],
self.config['CACHE_TOLERANCE'],
self.config['CACHE_EXP'],
self.config['CACHE_MAX_VALUES'],
self.config.get('CACHE_BACKEND',
'werkzeug.contrib.cache.NullCache'),
cache_refresh_time=self.config.get('CACHE_REFRESH_TIME', 86400),
**self.config.get('CACHE_OPTIONS', {})
)

def __repr__(self):
Expand Down
115 changes: 22 additions & 93 deletions timegate/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,65 +14,32 @@

import logging
import os
import sys
from datetime import datetime

from dateutil.relativedelta import relativedelta
from dateutil.tz import tzutc
from werkzeug.contrib.cache import FileSystemCache, md5

from . import utils as timegate_utils
from .errors import CacheError
from werkzeug.utils import import_string


class Cache(object):
"""Base class for TimeGate caches."""

def __init__(self, path, tolerance, expiration, max_values,
run_tests=True, max_file_size=0):
def __init__(self, cache_backend, cache_refresh_time=86400,
max_file_size=0, **kwargs):
"""Constructor method.

:param path: The path of the cache database file.
:param tolerance: The tolerance, in seconds to which a TimeMap is
considered young enough to be used as is.
:param expiration: How long, in seconds, the cache entries are stored
every get will be a CACHE MISS.
:param max_values: The maximum number of TimeMaps stored in cache
before some are deleted
:param run_tests: (Optional) Tests the cache at initialization.
:param cache_backend: Importable string pointing to cache class.
:param max_file_size: (Optional) The maximum size (in Bytes) for a
TimeMap cache value. When max_file_size=0, there is no limit to
a cache value. When max_file_size=X > 0, the cache will not
store TimeMap that require more than X Bytes on disk.
"""
# Parameters Check
if tolerance <= 0 or expiration <= 0 or max_values <= 0:
raise CacheError('Cannot create cache: all parameters must be > 0')

self.tolerance = relativedelta(seconds=tolerance)
self.path = path.rstrip('/')
self.tolerance = relativedelta(seconds=cache_refresh_time)
self.max_file_size = max(max_file_size, 0)
self.CHECK_SIZE = self.max_file_size > 0
self.max_values = max_values
self.backend = FileSystemCache(path,
threshold=self.max_values,
default_timeout=expiration)

# Testing cache
if run_tests:
try:
key = b'1'
val = 1
self.backend.set(key, val)
assert (not self.CHECK_SIZE) or self._check_size(key) > 0
assert self.backend.get(key) == val
os.remove(os.path.join(self.path, md5(key).hexdigest()))
except Exception as e:
raise CacheError('Error testing cache: %s' % e)

logging.debug(
'Cache created. max_files = %d. Expiration = %d. '
'max_file_size = %d' % (
self.max_values, expiration, self.max_file_size))
self.backend = import_string(cache_backend)(**kwargs)

def get_until(self, uri_r, date):
"""Returns the TimeMap (memento,datetime)-list for the requested
Expand All @@ -88,28 +55,11 @@ def get_until(self, uri_r, date):
None otherwise.
"""
# Query the backend for stored cache values to that memento
key = uri_r
try:
val = self.backend.get(key)
except Exception as e:
logging.error('Exception loading cache content: %s' % e)
return None

if val:
# There is a value in the cache
val = self.backend.get(uri_r)
if val: # There is a value in the cache
timestamp, timemap = val
logging.info('Cached value exists for %s' % uri_r)
if date > timestamp + self.tolerance:
logging.info('Cache MISS: value outdated for %s' % uri_r)
timemap = None
else:
logging.info('Cache HIT: found value for %s' % uri_r)
else:
# Cache MISS: No value
logging.info('Cache MISS: No cached value for %s' % uri_r)
timemap = None

return timemap
if date <= timestamp + self.tolerance:
return timemap

def get_all(self, uri_r):
"""Request the whole TimeMap for that uri.
Expand All @@ -130,42 +80,21 @@ def set(self, uri_r, timemap):
:param timemap: The value to cache.
:return: The backend setter method return value.
"""
logging.info('Updating cache for %s' % uri_r)
timestamp = datetime.utcnow().replace(tzinfo=tzutc())
val = (timestamp, timemap)
key = uri_r
try:
self.backend.set(key, val)
if self.CHECK_SIZE:
self._check_size(uri_r)
except Exception as e:
logging.error('Error setting cache value: %s' % e)
if self._check_size(val):
self.backend.set(uri_r, val)

def _check_size(self, key, delete=True):
"""Check the size that a specific TimeMap value is using on disk.
def _check_size(self, val):
"""Check the size that a specific TimeMap value is using in memory.

It deletes if it is more than the maximum size.

:param key: The TimeMap original resource.
:param delete: (Optional) When true, the value is deleted.
Else only a warning is raised.
:return: The size of the value on disk (0 if it was deleted).
:param val: The cached object.
:return: The True if it can be stored.
"""
try:
fname = md5(key).hexdigest() # werkzeug key
fpath = self.path + '/' + fname
size = os.path.getsize(fpath)
if size > self.max_file_size and delete:
message = ('Cache value too big (%dB, max %dB) '
'for the TimeMap of %s')
if delete:
message += '. Deleting cached value.'
os.remove(fpath)
size = 0
logging.warning(message % (size, self.max_file_size, key))
return size
except Exception as e:
logging.error(
'Exception checking cache value size for TimeMap of %s '
'Exception: %s' % (key, e))
return 0
if self.CHECK_SIZE:
size = sys.getsizeof(val)
if size > self.max_file_size:
return False
return True
14 changes: 6 additions & 8 deletions timegate/conf/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,9 @@ base_uri = http://www.example.com/

[cache]

# cache_activated
# When true, the cache stores TimeMaps from API that allows batch (get_all_mementos) requests, except for requests with `Cache-Control: no-cache` header, which will always return fresh Mementos.
# When false, no cache file will be created
# Default true
cache_activated = false
# cache_backend
# For disabling cache use werkzeug.contrib.cache.NullCache
cache_backend = werkzeug.contrib.cache:FileSystemCache

# cache_refresh_time
# Time in seconds, for which it is assumed that a TimeMap didn't change. Any TimeGate request for a datetime past this period (or any TimeMap request past this period) will trigger a refresh of the cached value.
Expand All @@ -55,10 +53,10 @@ cache_refresh_time = 86400
# cache_directory
# Cache directory relative path for data files. Make sure that this directory is empty or else the cache will start deleting random files.
# Default cache/
cache_directory = cache
cache_dir = cache

# cache_max_values
# threshold
# Maximum number of stored TimeMaps in the cache.
# Tweak this depending on how big your TimeMaps can become (number of elements and length of URIs)
# Default 250
cache_max_values = 250
threshold = 250
32 changes: 22 additions & 10 deletions timegate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,30 @@ def from_inifile(self, filename, silent=True):
self['USE_TIMEMAPS'] = False

# Cache
# When False, all cache requests will be cache MISS
self['CACHE_USE'] = conf.getboolean('cache', 'cache_activated')
self['CACHE_BACKEND'] = conf.get('cache', 'cache_backend')
# Time window in which the cache value is considered young
# enough to be valid
self['CACHE_TOLERANCE'] = conf.getint('cache', 'cache_refresh_time')
# Cache files paths
self['CACHE_DIRECTORY'] = conf.get(
'cache', 'cache_directory').rstrip('/')
# Maximum number of TimeMaps stored in cache
self['CACHE_MAX_VALUES'] = conf.getint('cache', 'cache_max_values')
# Cache files paths
self['CACHE_FILE'] = self['CACHE_DIRECTORY'] # + '/cache_data'
self['CACHE_REFRESH_TIME'] = conf.getint('cache', 'cache_refresh_time')

options = {
'cache_backend': None,
'cache_refresh_time': None,
'default_timeout': 'getint',
'mode': 'getint',
'port': 'getint',
'threshold': 'getint',
}
self.setdefault('CACHE_OPTIONS', {})

for key in conf.options('cache'):
if key in options:
getter = options[key]
if getter:
self['CACHE_OPTIONS'][key] = getattr(conf, getter)(
'cache', key
)
else:
self['CACHE_OPTIONS'][key] = conf.get('cache', key)

def from_object(self, obj):
"""Update config with values from given object.
Expand Down