Skip to content

Commit

Permalink
major ruff changes
Browse files Browse the repository at this point in the history
  • Loading branch information
barrust committed Dec 27, 2024
1 parent ce0c481 commit 869c627
Show file tree
Hide file tree
Showing 18 changed files with 153 additions and 145 deletions.
36 changes: 17 additions & 19 deletions probables/blooms/bloom.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
""" BloomFilter and BloomFiter on Disk, python implementation
License: MIT
Author: Tyler Barrus ([email protected])
URL: https://github.com/barrust/bloom
"""BloomFilter and BloomFiter on Disk, python implementation
License: MIT
Author: Tyler Barrus ([email protected])
URL: https://github.com/barrust/bloom
"""

import math
import os
from array import array
from binascii import hexlify, unhexlify
from io import BytesIO, IOBase
from collections.abc import ByteString
from io import BufferedRandom, BytesIO, IOBase
from mmap import mmap
from numbers import Number
from pathlib import Path
from shutil import copyfile
from struct import Struct
from textwrap import wrap
from typing import ByteString, Tuple, Union
from typing import Union

from probables.exceptions import InitializationError, NotSupportedError
from probables.hashes import HashFuncT, HashResultsT, KeyT, default_fnv_1a
Expand Down Expand Up @@ -307,10 +309,7 @@ def export_c_header(self, filename: Union[str, Path]) -> None:
Args:
filename (str): The filename to which the Bloom Filter will be written."""
data = (" " + line for line in wrap(", ".join(f"0x{e:02x}" for e in bytearray.fromhex(self.export_hex())), 80))
if self._type in ["regular", "regular-on-disk"]:
bloom_type = "standard BloomFilter"
else:
bloom_type = "CountingBloomFilter"
bloom_type = "standard BloomFilter" if self._type in {"regular", "regular-on-disk"} else "CountingBloomFilter"

with open(filename, "w", encoding="utf-8") as file:
print(f"/* BloomFilter Export of a {bloom_type} */", file=file)
Expand Down Expand Up @@ -465,7 +464,7 @@ def jaccard_index(self, second: SimpleBloomT) -> Union[float, None]:

# More private functions
@classmethod
def _get_optimized_params(cls, estimated_elements: int, false_positive_rate: float) -> Tuple[float, int, int]:
def _get_optimized_params(cls, estimated_elements: int, false_positive_rate: float) -> tuple[float, int, int]:
valid_prms = isinstance(estimated_elements, Number) and estimated_elements > 0
if not valid_prms:
msg = "Bloom: estimated elements must be greater than 0"
Expand Down Expand Up @@ -528,15 +527,16 @@ def _load(
else:
offset = self._FOOTER_STRUCT.size
est_els, els_added, fpr, n_hashes, n_bits = self._parse_footer(
self._FOOTER_STRUCT, file[-1 * offset :] # type: ignore
self._FOOTER_STRUCT,
file[-1 * offset :], # type: ignore
)
self._set_values(est_els, fpr, n_hashes, n_bits, hash_function)
# now read in the bit array!
self._parse_bloom_array(file, self._IMPT_STRUCT.size * self.bloom_length) # type: ignore
self._els_added = els_added

@classmethod
def _parse_footer(cls, stct: Struct, d: ByteString) -> Tuple[int, int, float, int, int]:
def _parse_footer(cls, stct: Struct, d: ByteString) -> tuple[int, int, float, int, int]:
"""parse footer returning the data: estimated elements, elements added,
false positive rate, hash function, number hashes, number bits"""
e_elms, e_added, fpr = stct.unpack_from(bytearray(d))
Expand Down Expand Up @@ -568,9 +568,7 @@ def _verify_bloom_similarity(self, second: SimpleBloomT) -> bool:
hash_match = self.number_hashes != second.number_hashes
same_bits = self.number_bits != second.number_bits
next_hash = self.hashes("test") != second.hashes("test")
if hash_match or same_bits or next_hash:
return False
return True
return not (hash_match or same_bits or next_hash)


class BloomFilterOnDisk(BloomFilter):
Expand Down Expand Up @@ -607,7 +605,7 @@ def __init__(
) -> None:
# set some things up
self._filepath = resolve_path(filepath)
self.__file_pointer = None
self.__file_pointer: BufferedRandom | None = None
super().__init__(est_elements, false_positive_rate, filepath, hex_string, hash_function)

def _load_init(self, filepath, hash_function, hex_string, est_elements, false_positive_rate):
Expand Down Expand Up @@ -642,7 +640,7 @@ def close(self) -> None:
"""Clean up the BloomFilterOnDisk object"""
if self.__file_pointer is not None and not self.__file_pointer.closed:
self.__update()
self._bloom.close()
self._bloom.close() # type: ignore
self.__file_pointer.close()
self.__file_pointer = None

Expand Down Expand Up @@ -671,7 +669,7 @@ def _load(self, file: Union[str, Path], hash_function: Union[HashFuncT, None] =
fpr, n_hashes, n_bits = self._get_optimized_params(est_els, fpr)
self._set_values(est_els, fpr, n_hashes, n_bits, hash_function)
# setup a few additional items
self.__file_pointer = open(file, "r+b") # type: ignore
self.__file_pointer = open(file, "r+b") # noqa: SIM115
self._bloom = mmap(self.__file_pointer.fileno(), 0) # type: ignore
self._on_disk = True

Expand Down
3 changes: 2 additions & 1 deletion probables/blooms/countingbloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
URL: https://github.com/barrust/counting_bloom
"""
from array import array
from collections.abc import ByteString
from pathlib import Path
from struct import Struct
from typing import ByteString, Union
from typing import Union

from probables.blooms.bloom import BloomFilter
from probables.constants import UINT32_T_MAX, UINT64_T_MAX
Expand Down
14 changes: 8 additions & 6 deletions probables/blooms/expandingbloom.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
""" Expanding and Rotating BloomFilter, python implementations
License: MIT
Author: Tyler Barrus ([email protected])
URL: https://github.com/barrust/pyprobables
"""Expanding and Rotating BloomFilter, python implementations
License: MIT
Author: Tyler Barrus ([email protected])
URL: https://github.com/barrust/pyprobables
"""

from array import array
from collections.abc import ByteString
from io import BytesIO, IOBase
from mmap import mmap
from pathlib import Path
from struct import Struct
from typing import ByteString, Tuple, Union
from typing import Union

from probables.blooms.bloom import BloomFilter
from probables.exceptions import RotatingBloomFilterError
Expand Down Expand Up @@ -144,6 +145,7 @@ def check_alt(self, hashes: HashResultsT) -> bool:
hashes (list): The hash representation to check for in the Bloom Filter
Returns:
bool: `True` if the element is likely present; `False` if definately not present"""
# return any(.check_alt(hashes))
for blm in self._blooms:
if blm.check_alt(hashes):
return True
Expand Down Expand Up @@ -224,7 +226,7 @@ def __load(self, file: Union[Path, str, IOBase, mmap]):
self._parse_blooms(file, size) # type:ignore

@classmethod
def _parse_footer(cls, b: ByteString) -> Tuple[int, int, int, float]:
def _parse_footer(cls, b: ByteString) -> tuple[int, int, int, float]:
offset = cls.__FOOTER_STRUCT.size
size, est_els, els_added, fpr = cls.__FOOTER_STRUCT.unpack(bytes(b[-1 * offset :]))
return int(size), int(est_els), int(els_added), float(fpr)
Expand Down
17 changes: 9 additions & 8 deletions probables/countminsketch/countminsketch.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
""" Count-Min Sketch, Heavy Hitters, and Stream Threshold, python implementations
License: MIT
Author: Tyler Barrus ([email protected])
URL: https://github.com/barrust/count-min-sketch
"""Count-Min Sketch, Heavy Hitters, and Stream Threshold, python implementations
License: MIT
Author: Tyler Barrus ([email protected])
URL: https://github.com/barrust/count-min-sketch
"""

import math
from array import array
from collections.abc import ByteString
from io import BytesIO, IOBase
from mmap import mmap
from numbers import Number
from pathlib import Path
from struct import Struct
from typing import ByteString, Dict, Tuple, Union
from typing import Union

from probables.constants import INT32_T_MAX, INT32_T_MIN, INT64_T_MAX, INT64_T_MIN
from probables.exceptions import CountMinSketchError, InitializationError, NotSupportedError
Expand Down Expand Up @@ -408,7 +409,7 @@ def __load(self, file: Union[Path, str, IOBase, mmap]):
self._parse_bytes(file) # type: ignore

@classmethod
def _parse_footer(cls, file: ByteString) -> Tuple[int, int, int]:
def _parse_footer(cls, file: ByteString) -> tuple[int, int, int]:
"""return width, depth and elements added, in that order"""
offset = cls.__FOOTER_STRUCT.size
width, depth, elements_added = cls.__FOOTER_STRUCT.unpack_from(bytes(file[-1 * offset :]))
Expand Down Expand Up @@ -599,7 +600,7 @@ def __str__(self) -> str:
)

@property
def heavy_hitters(self) -> Dict[str, int]:
def heavy_hitters(self) -> dict[str, int]:
"""dict: Return the heavy hitters, or most common elements
Note:
Expand Down Expand Up @@ -758,7 +759,7 @@ def __str__(self) -> str:
)

@property
def meets_threshold(self) -> Dict[str, int]:
def meets_threshold(self) -> dict[str, int]:
"""dict: Those keys that meet the required threshold (with value)"""
return self.__meets_threshold

Expand Down
15 changes: 7 additions & 8 deletions probables/cuckoo/countingcuckoo.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
""" Counting Cuckoo Filter, python implementation
License: MIT
Author: Tyler Barrus ([email protected])
"""Counting Cuckoo Filter, python implementation
License: MIT
Author: Tyler Barrus ([email protected])
"""

import random
from array import array
from collections.abc import ByteString
from io import IOBase
from mmap import mmap
from pathlib import Path
from struct import Struct
from typing import ByteString, List, Union
from typing import Union

from probables.cuckoo.cuckoo import CuckooFilter
from probables.exceptions import CuckooFilterFullError
Expand Down Expand Up @@ -135,17 +136,15 @@ def frombytes(

def __contains__(self, val: KeyT) -> bool:
"""setup the `in` keyword"""
if self.check(val) > 0:
return True
return False
return self.check(val) > 0

@property
def unique_elements(self) -> int:
"""int: unique number of elements inserted"""
return self.__unique_elements

@property
def buckets(self) -> List[List["CountingCuckooBin"]]: # type: ignore
def buckets(self) -> list[list["CountingCuckooBin"]]: # type: ignore
"""list(list): The buckets holding the fingerprints
Note:
Expand Down
17 changes: 8 additions & 9 deletions probables/cuckoo/cuckoo.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
""" Cuckoo Filter, python implementation
License: MIT
Author: Tyler Barrus ([email protected])
"""Cuckoo Filter, python implementation
License: MIT
Author: Tyler Barrus ([email protected])
"""

import math
import random
from array import array
from collections.abc import ByteString
from io import BytesIO, IOBase
from mmap import mmap
from numbers import Number
from pathlib import Path
from struct import Struct
from typing import ByteString, List, Tuple, Union
from typing import Union

from probables.exceptions import CuckooFilterFullError, InitializationError
from probables.hashes import KeyT, SimpleHashT, fnv_1a
Expand Down Expand Up @@ -226,7 +227,7 @@ def bucket_size(self) -> int:
return self._bucket_size

@property
def buckets(self) -> List[List[int]]:
def buckets(self) -> list[list[int]]:
"""list(list): The buckets holding the fingerprints
Note:
Expand Down Expand Up @@ -312,9 +313,7 @@ def check(self, key: KeyT) -> bool:
bool: True if likely present, False if definately not"""
idx_1, idx_2, fingerprint = self._generate_fingerprint_info(key)
is_present = self._check_if_present(idx_1, idx_2, fingerprint)
if is_present is not None:
return True
return False
return is_present is not None

def remove(self, key: KeyT) -> bool:
"""Remove an element from the filter
Expand Down Expand Up @@ -491,7 +490,7 @@ def _indicies_from_fingerprint(self, fingerprint):
idx_2 = self.__hash_func(str(fingerprint)) % self.capacity
return idx_1, idx_2

def _generate_fingerprint_info(self, key: KeyT) -> Tuple[int, int, int]:
def _generate_fingerprint_info(self, key: KeyT) -> tuple[int, int, int]:
"""Generate the fingerprint and indicies using the provided key
Args:
Expand Down
14 changes: 7 additions & 7 deletions probables/hashes.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
""" Probables Hashing Utilities """
"""Probables Hashing Utilities"""

from functools import wraps
from hashlib import md5, sha256
from struct import unpack
from typing import Callable, List, Union
from typing import Callable, Union

from probables.constants import UINT32_T_MAX, UINT64_T_MAX

KeyT = Union[str, bytes]
SimpleHashT = Callable[[KeyT, int], int]
HashResultsT = List[int]
HashResultsT = list[int]
HashFuncT = Callable[[KeyT, int], HashResultsT]
HashFuncBytesT = Callable[[KeyT, int], bytes]

Expand Down Expand Up @@ -67,14 +67,14 @@ def hashing_func(key, depth=1):
return hashing_func


def default_fnv_1a(key: KeyT, depth: int = 1) -> List[int]:
def default_fnv_1a(key: KeyT, depth: int = 1) -> list[int]:
"""The default fnv-1a hashing routine
Args:
key (str): The element to be hashed
depth (int): The number of hash permutations to compute
Returns:
list(int): List of size depth hashes"""
list(int): list of size depth hashes"""

res = []
for idx in range(depth):
Expand Down Expand Up @@ -129,7 +129,7 @@ def default_md5(key: KeyT, *args, **kwargs) -> bytes:
key (str): The element to be hashed
depth (int): The number of hash permutations to compute
Returns:
list(int): List of 64-bit hashed representation of key hashes
list(int): list of 64-bit hashed representation of key hashes
Note:
Returns the upper-most 64 bits"""
return md5(key).digest() # type: ignore
Expand All @@ -143,7 +143,7 @@ def default_sha256(key: KeyT, *args, **kwargs) -> bytes:
key (str): The element to be hashed
depth (int): The number of hash permutations to compute
Returns:
list(int): List of 64-bit hashed representation of key hashes
list(int): list of 64-bit hashed representation of key hashes
Note:
Returns the upper-most 64 bits"""
return sha256(key).digest() # type: ignore
Loading

0 comments on commit 869c627

Please sign in to comment.