-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
153 additions
and
145 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,22 @@ | ||
""" BloomFilter and BloomFiter on Disk, python implementation | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
URL: https://github.com/barrust/bloom | ||
"""BloomFilter and BloomFiter on Disk, python implementation | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
URL: https://github.com/barrust/bloom | ||
""" | ||
|
||
import math | ||
import os | ||
from array import array | ||
from binascii import hexlify, unhexlify | ||
from io import BytesIO, IOBase | ||
from collections.abc import ByteString | ||
from io import BufferedRandom, BytesIO, IOBase | ||
from mmap import mmap | ||
from numbers import Number | ||
from pathlib import Path | ||
from shutil import copyfile | ||
from struct import Struct | ||
from textwrap import wrap | ||
from typing import ByteString, Tuple, Union | ||
from typing import Union | ||
|
||
from probables.exceptions import InitializationError, NotSupportedError | ||
from probables.hashes import HashFuncT, HashResultsT, KeyT, default_fnv_1a | ||
|
@@ -307,10 +309,7 @@ def export_c_header(self, filename: Union[str, Path]) -> None: | |
Args: | ||
filename (str): The filename to which the Bloom Filter will be written.""" | ||
data = (" " + line for line in wrap(", ".join(f"0x{e:02x}" for e in bytearray.fromhex(self.export_hex())), 80)) | ||
if self._type in ["regular", "regular-on-disk"]: | ||
bloom_type = "standard BloomFilter" | ||
else: | ||
bloom_type = "CountingBloomFilter" | ||
bloom_type = "standard BloomFilter" if self._type in {"regular", "regular-on-disk"} else "CountingBloomFilter" | ||
|
||
with open(filename, "w", encoding="utf-8") as file: | ||
print(f"/* BloomFilter Export of a {bloom_type} */", file=file) | ||
|
@@ -465,7 +464,7 @@ def jaccard_index(self, second: SimpleBloomT) -> Union[float, None]: | |
|
||
# More private functions | ||
@classmethod | ||
def _get_optimized_params(cls, estimated_elements: int, false_positive_rate: float) -> Tuple[float, int, int]: | ||
def _get_optimized_params(cls, estimated_elements: int, false_positive_rate: float) -> tuple[float, int, int]: | ||
valid_prms = isinstance(estimated_elements, Number) and estimated_elements > 0 | ||
if not valid_prms: | ||
msg = "Bloom: estimated elements must be greater than 0" | ||
|
@@ -528,15 +527,16 @@ def _load( | |
else: | ||
offset = self._FOOTER_STRUCT.size | ||
est_els, els_added, fpr, n_hashes, n_bits = self._parse_footer( | ||
self._FOOTER_STRUCT, file[-1 * offset :] # type: ignore | ||
self._FOOTER_STRUCT, | ||
file[-1 * offset :], # type: ignore | ||
) | ||
self._set_values(est_els, fpr, n_hashes, n_bits, hash_function) | ||
# now read in the bit array! | ||
self._parse_bloom_array(file, self._IMPT_STRUCT.size * self.bloom_length) # type: ignore | ||
self._els_added = els_added | ||
|
||
@classmethod | ||
def _parse_footer(cls, stct: Struct, d: ByteString) -> Tuple[int, int, float, int, int]: | ||
def _parse_footer(cls, stct: Struct, d: ByteString) -> tuple[int, int, float, int, int]: | ||
"""parse footer returning the data: estimated elements, elements added, | ||
false positive rate, hash function, number hashes, number bits""" | ||
e_elms, e_added, fpr = stct.unpack_from(bytearray(d)) | ||
|
@@ -568,9 +568,7 @@ def _verify_bloom_similarity(self, second: SimpleBloomT) -> bool: | |
hash_match = self.number_hashes != second.number_hashes | ||
same_bits = self.number_bits != second.number_bits | ||
next_hash = self.hashes("test") != second.hashes("test") | ||
if hash_match or same_bits or next_hash: | ||
return False | ||
return True | ||
return not (hash_match or same_bits or next_hash) | ||
|
||
|
||
class BloomFilterOnDisk(BloomFilter): | ||
|
@@ -607,7 +605,7 @@ def __init__( | |
) -> None: | ||
# set some things up | ||
self._filepath = resolve_path(filepath) | ||
self.__file_pointer = None | ||
self.__file_pointer: BufferedRandom | None = None | ||
super().__init__(est_elements, false_positive_rate, filepath, hex_string, hash_function) | ||
|
||
def _load_init(self, filepath, hash_function, hex_string, est_elements, false_positive_rate): | ||
|
@@ -642,7 +640,7 @@ def close(self) -> None: | |
"""Clean up the BloomFilterOnDisk object""" | ||
if self.__file_pointer is not None and not self.__file_pointer.closed: | ||
self.__update() | ||
self._bloom.close() | ||
self._bloom.close() # type: ignore | ||
self.__file_pointer.close() | ||
self.__file_pointer = None | ||
|
||
|
@@ -671,7 +669,7 @@ def _load(self, file: Union[str, Path], hash_function: Union[HashFuncT, None] = | |
fpr, n_hashes, n_bits = self._get_optimized_params(est_els, fpr) | ||
self._set_values(est_els, fpr, n_hashes, n_bits, hash_function) | ||
# setup a few additional items | ||
self.__file_pointer = open(file, "r+b") # type: ignore | ||
self.__file_pointer = open(file, "r+b") # noqa: SIM115 | ||
self._bloom = mmap(self.__file_pointer.fileno(), 0) # type: ignore | ||
self._on_disk = True | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,16 @@ | ||
""" Expanding and Rotating BloomFilter, python implementations | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
URL: https://github.com/barrust/pyprobables | ||
"""Expanding and Rotating BloomFilter, python implementations | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
URL: https://github.com/barrust/pyprobables | ||
""" | ||
|
||
from array import array | ||
from collections.abc import ByteString | ||
from io import BytesIO, IOBase | ||
from mmap import mmap | ||
from pathlib import Path | ||
from struct import Struct | ||
from typing import ByteString, Tuple, Union | ||
from typing import Union | ||
|
||
from probables.blooms.bloom import BloomFilter | ||
from probables.exceptions import RotatingBloomFilterError | ||
|
@@ -144,6 +145,7 @@ def check_alt(self, hashes: HashResultsT) -> bool: | |
hashes (list): The hash representation to check for in the Bloom Filter | ||
Returns: | ||
bool: `True` if the element is likely present; `False` if definately not present""" | ||
# return any(.check_alt(hashes)) | ||
for blm in self._blooms: | ||
if blm.check_alt(hashes): | ||
return True | ||
|
@@ -224,7 +226,7 @@ def __load(self, file: Union[Path, str, IOBase, mmap]): | |
self._parse_blooms(file, size) # type:ignore | ||
|
||
@classmethod | ||
def _parse_footer(cls, b: ByteString) -> Tuple[int, int, int, float]: | ||
def _parse_footer(cls, b: ByteString) -> tuple[int, int, int, float]: | ||
offset = cls.__FOOTER_STRUCT.size | ||
size, est_els, els_added, fpr = cls.__FOOTER_STRUCT.unpack(bytes(b[-1 * offset :])) | ||
return int(size), int(est_els), int(els_added), float(fpr) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,18 @@ | ||
""" Count-Min Sketch, Heavy Hitters, and Stream Threshold, python implementations | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
URL: https://github.com/barrust/count-min-sketch | ||
"""Count-Min Sketch, Heavy Hitters, and Stream Threshold, python implementations | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
URL: https://github.com/barrust/count-min-sketch | ||
""" | ||
|
||
import math | ||
from array import array | ||
from collections.abc import ByteString | ||
from io import BytesIO, IOBase | ||
from mmap import mmap | ||
from numbers import Number | ||
from pathlib import Path | ||
from struct import Struct | ||
from typing import ByteString, Dict, Tuple, Union | ||
from typing import Union | ||
|
||
from probables.constants import INT32_T_MAX, INT32_T_MIN, INT64_T_MAX, INT64_T_MIN | ||
from probables.exceptions import CountMinSketchError, InitializationError, NotSupportedError | ||
|
@@ -408,7 +409,7 @@ def __load(self, file: Union[Path, str, IOBase, mmap]): | |
self._parse_bytes(file) # type: ignore | ||
|
||
@classmethod | ||
def _parse_footer(cls, file: ByteString) -> Tuple[int, int, int]: | ||
def _parse_footer(cls, file: ByteString) -> tuple[int, int, int]: | ||
"""return width, depth and elements added, in that order""" | ||
offset = cls.__FOOTER_STRUCT.size | ||
width, depth, elements_added = cls.__FOOTER_STRUCT.unpack_from(bytes(file[-1 * offset :])) | ||
|
@@ -599,7 +600,7 @@ def __str__(self) -> str: | |
) | ||
|
||
@property | ||
def heavy_hitters(self) -> Dict[str, int]: | ||
def heavy_hitters(self) -> dict[str, int]: | ||
"""dict: Return the heavy hitters, or most common elements | ||
Note: | ||
|
@@ -758,7 +759,7 @@ def __str__(self) -> str: | |
) | ||
|
||
@property | ||
def meets_threshold(self) -> Dict[str, int]: | ||
def meets_threshold(self) -> dict[str, int]: | ||
"""dict: Those keys that meet the required threshold (with value)""" | ||
return self.__meets_threshold | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,16 @@ | ||
""" Counting Cuckoo Filter, python implementation | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
"""Counting Cuckoo Filter, python implementation | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
""" | ||
|
||
import random | ||
from array import array | ||
from collections.abc import ByteString | ||
from io import IOBase | ||
from mmap import mmap | ||
from pathlib import Path | ||
from struct import Struct | ||
from typing import ByteString, List, Union | ||
from typing import Union | ||
|
||
from probables.cuckoo.cuckoo import CuckooFilter | ||
from probables.exceptions import CuckooFilterFullError | ||
|
@@ -135,17 +136,15 @@ def frombytes( | |
|
||
def __contains__(self, val: KeyT) -> bool: | ||
"""setup the `in` keyword""" | ||
if self.check(val) > 0: | ||
return True | ||
return False | ||
return self.check(val) > 0 | ||
|
||
@property | ||
def unique_elements(self) -> int: | ||
"""int: unique number of elements inserted""" | ||
return self.__unique_elements | ||
|
||
@property | ||
def buckets(self) -> List[List["CountingCuckooBin"]]: # type: ignore | ||
def buckets(self) -> list[list["CountingCuckooBin"]]: # type: ignore | ||
"""list(list): The buckets holding the fingerprints | ||
Note: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,18 @@ | ||
""" Cuckoo Filter, python implementation | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
"""Cuckoo Filter, python implementation | ||
License: MIT | ||
Author: Tyler Barrus ([email protected]) | ||
""" | ||
|
||
import math | ||
import random | ||
from array import array | ||
from collections.abc import ByteString | ||
from io import BytesIO, IOBase | ||
from mmap import mmap | ||
from numbers import Number | ||
from pathlib import Path | ||
from struct import Struct | ||
from typing import ByteString, List, Tuple, Union | ||
from typing import Union | ||
|
||
from probables.exceptions import CuckooFilterFullError, InitializationError | ||
from probables.hashes import KeyT, SimpleHashT, fnv_1a | ||
|
@@ -226,7 +227,7 @@ def bucket_size(self) -> int: | |
return self._bucket_size | ||
|
||
@property | ||
def buckets(self) -> List[List[int]]: | ||
def buckets(self) -> list[list[int]]: | ||
"""list(list): The buckets holding the fingerprints | ||
Note: | ||
|
@@ -312,9 +313,7 @@ def check(self, key: KeyT) -> bool: | |
bool: True if likely present, False if definately not""" | ||
idx_1, idx_2, fingerprint = self._generate_fingerprint_info(key) | ||
is_present = self._check_if_present(idx_1, idx_2, fingerprint) | ||
if is_present is not None: | ||
return True | ||
return False | ||
return is_present is not None | ||
|
||
def remove(self, key: KeyT) -> bool: | ||
"""Remove an element from the filter | ||
|
@@ -491,7 +490,7 @@ def _indicies_from_fingerprint(self, fingerprint): | |
idx_2 = self.__hash_func(str(fingerprint)) % self.capacity | ||
return idx_1, idx_2 | ||
|
||
def _generate_fingerprint_info(self, key: KeyT) -> Tuple[int, int, int]: | ||
def _generate_fingerprint_info(self, key: KeyT) -> tuple[int, int, int]: | ||
"""Generate the fingerprint and indicies using the provided key | ||
Args: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.