Skip to content

Commit

Permalink
Fix lru cache copying (getsentry#3883)
Browse files Browse the repository at this point in the history
A simpler and better LRU Cache implementation that prevents data leaking between copied caches.

Fixes getsentry#3852

---------

Co-authored-by: Anton Pirker <[email protected]>
  • Loading branch information
ffelixg and antonpirker authored Dec 20, 2024
1 parent 8ced660 commit f6281f5
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 161 deletions.
195 changes: 35 additions & 160 deletions sentry_sdk/_lru_cache.py
Original file line number Diff line number Diff line change
@@ -1,181 +1,56 @@
"""
A fork of Python 3.6's stdlib lru_cache (found in Python's 'cpython/Lib/functools.py')
adapted into a data structure for single threaded uses.
from typing import TYPE_CHECKING

https://github.com/python/cpython/blob/v3.6.12/Lib/functools.py
if TYPE_CHECKING:
from typing import Any


Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation;
All Rights Reserved
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing and
otherwise using this software ("Python") in source or binary form and
its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF hereby
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
analyze, test, perform and/or display publicly, prepare derivative works,
distribute, and otherwise use Python alone or in any derivative version,
provided, however, that PSF's License Agreement and PSF's notice of copyright,
i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation;
All Rights Reserved" are retained in Python alone or in any derivative version
prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python.
4. PSF is making Python available to Licensee on an "AS IS"
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF and
Licensee. This License Agreement does not grant permission to use PSF
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
"""

from copy import copy, deepcopy

SENTINEL = object()


# aliases to the entries in a node
PREV = 0
NEXT = 1
KEY = 2
VALUE = 3
_SENTINEL = object()


class LRUCache:
def __init__(self, max_size):
assert max_size > 0

# type: (int) -> None
if max_size <= 0:
raise AssertionError(f"invalid max_size: {max_size}")
self.max_size = max_size
self.full = False

self.cache = {}

# root of the circularly linked list to keep track of
# the least recently used key
self.root = [] # type: ignore
# the node looks like [PREV, NEXT, KEY, VALUE]
self.root[:] = [self.root, self.root, None, None]

self._data = {} # type: dict[Any, Any]
self.hits = self.misses = 0
self.full = False

def __copy__(self):
cache = LRUCache(self.max_size)
cache.full = self.full
cache.cache = copy(self.cache)
cache.root = deepcopy(self.root)
return cache
# type: () -> LRUCache
new = LRUCache(max_size=self.max_size)
new.hits = self.hits
new.misses = self.misses
new.full = self.full
new._data = self._data.copy()
return new

def set(self, key, value):
link = self.cache.get(key, SENTINEL)

if link is not SENTINEL:
# have to move the node to the front of the linked list
link_prev, link_next, _key, _value = link

# first remove the node from the lsnked list
link_prev[NEXT] = link_next
link_next[PREV] = link_prev

# insert the node between the root and the last
last = self.root[PREV]
last[NEXT] = self.root[PREV] = link
link[PREV] = last
link[NEXT] = self.root

# update the value
link[VALUE] = value

# type: (Any, Any) -> None
current = self._data.pop(key, _SENTINEL)
if current is not _SENTINEL:
self._data[key] = value
elif self.full:
# reuse the root node, so update its key/value
old_root = self.root
old_root[KEY] = key
old_root[VALUE] = value

self.root = old_root[NEXT]
old_key = self.root[KEY]

self.root[KEY] = self.root[VALUE] = None

del self.cache[old_key]

self.cache[key] = old_root

self._data.pop(next(iter(self._data)))
self._data[key] = value
else:
# insert new node after last
last = self.root[PREV]
link = [last, self.root, key, value]
last[NEXT] = self.root[PREV] = self.cache[key] = link
self.full = len(self.cache) >= self.max_size
self._data[key] = value
self.full = len(self._data) >= self.max_size

def get(self, key, default=None):
link = self.cache.get(key, SENTINEL)

if link is SENTINEL:
# type: (Any, Any) -> Any
try:
ret = self._data.pop(key)
except KeyError:
self.misses += 1
return default

# have to move the node to the front of the linked list
link_prev, link_next, _key, _value = link

# first remove the node from the lsnked list
link_prev[NEXT] = link_next
link_next[PREV] = link_prev

# insert the node between the root and the last
last = self.root[PREV]
last[NEXT] = self.root[PREV] = link
link[PREV] = last
link[NEXT] = self.root

self.hits += 1
ret = default
else:
self.hits += 1
self._data[key] = ret

return link[VALUE]
return ret

def get_all(self):
nodes = []
node = self.root[NEXT]

# To ensure the loop always terminates we iterate to the maximum
# size of the LRU cache.
for _ in range(self.max_size):
# The cache may not be full. We exit early if we've wrapped
# around to the head.
if node is self.root:
break
nodes.append((node[KEY], node[VALUE]))
node = node[NEXT]

return nodes
# type: () -> list[tuple[Any, Any]]
return list(self._data.items())
37 changes: 36 additions & 1 deletion tests/test_lru_cache.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from copy import copy
from copy import copy, deepcopy

from sentry_sdk._lru_cache import LRUCache

Expand Down Expand Up @@ -76,3 +76,38 @@ def test_cache_copy():
cache.get(1)
assert copied.get_all() == [(1, 1), (2, 2), (3, 3)]
assert cache.get_all() == [(2, 2), (3, 3), (1, 1)]


def test_cache_deepcopy():
cache = LRUCache(3)
cache.set(0, 0)
cache.set(1, 1)

copied = deepcopy(cache)
cache.set(2, 2)
cache.set(3, 3)
assert copied.get_all() == [(0, 0), (1, 1)]
assert cache.get_all() == [(1, 1), (2, 2), (3, 3)]

copied = deepcopy(cache)
cache.get(1)
assert copied.get_all() == [(1, 1), (2, 2), (3, 3)]
assert cache.get_all() == [(2, 2), (3, 3), (1, 1)]


def test_cache_pollution():
cache1 = LRUCache(max_size=2)
cache1.set(1, True)
cache2 = copy(cache1)
cache2.set(1, False)
assert cache1.get(1) is True
assert cache2.get(1) is False


def test_cache_pollution_deepcopy():
cache1 = LRUCache(max_size=2)
cache1.set(1, True)
cache2 = deepcopy(cache1)
cache2.set(1, False)
assert cache1.get(1) is True
assert cache2.get(1) is False
22 changes: 22 additions & 0 deletions tests/test_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,28 @@ def test_all_slots_copied():
assert getattr(scope_copy, attr) == getattr(scope, attr)


def test_scope_flags_copy():
# Assert forking creates a deepcopy of the flag buffer. The new
# scope is free to mutate without consequence to the old scope. The
# old scope is free to mutate without consequence to the new scope.
old_scope = Scope()
old_scope.flags.set("a", True)

new_scope = old_scope.fork()
new_scope.flags.set("a", False)
old_scope.flags.set("b", True)
new_scope.flags.set("c", True)

assert old_scope.flags.get() == [
{"flag": "a", "result": True},
{"flag": "b", "result": True},
]
assert new_scope.flags.get() == [
{"flag": "a", "result": False},
{"flag": "c", "result": True},
]


def test_merging(sentry_init, capture_events):
sentry_init()

Expand Down

0 comments on commit f6281f5

Please sign in to comment.