Skip to content

Commit

Permalink
bumped version and updated project structure
Browse files Browse the repository at this point in the history
  • Loading branch information
hitblast committed Jul 4, 2024
1 parent 91942f5 commit 3f4211f
Show file tree
Hide file tree
Showing 9 changed files with 301 additions and 285 deletions.
2 changes: 1 addition & 1 deletion avro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
from .main import *

# Version information.
__version_info__ = (2024, 7, 1)
__version_info__ = (2024, 7, 5)
__version__ = ".".join(map(str, __version_info__))
292 changes: 13 additions & 279 deletions avro/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,16 @@


# Import first-party Python libraries.
import contextlib
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache
from itertools import chain
from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
from typing import Callable, Generator, List, Tuple, Union

# Import local modules.
from . import config
from .utils import validate
from .utils import processor, validate

# Constants.
PATTERNS = config.DICT["avro"]["patterns"]
NON_RULE_PATTERNS = [p for p in PATTERNS if "rules" not in p]
RULE_PATTERNS = [p for p in PATTERNS if "rules" in p]
# Import local modules.
from .utils.config import BIJOY_MAP


# The helper function for handling multithreaded workloads.
Expand Down Expand Up @@ -65,7 +60,7 @@ def _parse_backend(text: str) -> str:

# Replace predefined exceptions in the input text.
if remap_words:
fixed_text, manual_required = _find_in_remap(fixed_text)
fixed_text, manual_required = processor.find_in_remap(fixed_text)

def output_generator() -> Generator[str, None, None]:
nonlocal cur_end
Expand All @@ -79,19 +74,19 @@ def output_generator() -> Generator[str, None, None]:
yield i

elif cur >= cur_end and uni_pass:
match = _match_patterns(fixed_text, cur, rule=False)
match = processor.match_patterns(fixed_text, cur, rule=False)
matched = match["matched"]

if matched:
yield match["replaced"]
cur_end = cur + len(match["found"])
else:
match = _match_patterns(fixed_text, cur, rule=True)
match = processor.match_patterns(fixed_text, cur, rule=True)
matched = match["matched"]

if matched:
cur_end = cur + len(match["found"])
replaced = _process_rules(
replaced = processor.process_rules(
rules=match["rules"], fixed_text=fixed_text, cur=cur, cur_end=cur_end
)

Expand Down Expand Up @@ -135,10 +130,10 @@ def to_bijoy(*texts: str) -> Union[str, List[str]]:

@lru_cache(maxsize=128)
def _convert_backend(text: str) -> str:
text = _rearrange_unicode_text(re.sub("ৌ", "ৌ", re.sub("ো", "ো", text)))
text = processor.rearrange_unicode_text(re.sub("ৌ", "ৌ", re.sub("ো", "ো", text)))

for unic in config.BIJOY_MAP:
text = re.sub(unic, config.BIJOY_MAP[unic], text)
for unic in BIJOY_MAP:
text = re.sub(unic, BIJOY_MAP[unic], text)

return text.strip()

Expand Down Expand Up @@ -173,14 +168,14 @@ def _reverse_backend(text: str) -> str:

# Replace predefined exceptions in the input text.
if remap_words:
text, manual_required = _find_in_remap(text, reversed=True)
text, manual_required = processor.find_in_remap(text, reversed=True)

# Iterate through input text.
def output_generator() -> Generator[str, None, None]:
for cur, i in enumerate(text):
try:
i.encode("utf-8")
match = _match_patterns(text, cur, rule=False, reversed=True)
match = processor.match_patterns(text, cur, rule=False, reversed=True)

yield (match["reversed"] or match["found"]) if match["matched"] else i

Expand All @@ -202,264 +197,3 @@ def _reverse_backend_ext(text: str) -> str:
# Prepare final output.
output = _concurrency_helper(_reverse_backend_ext, texts)
return output[0] if len(output) == 1 else output


def _rearrange_unicode_text(text: str) -> str:
"""
Rearranges Unicode (Avro) text to match conversion standards for ASCII.
Returns the rearranged string.
"""

# Convert the string to a list of individual characters.
chars = list(text)
length = len(chars)
barrier = 0

for i in range(length):
if validate.is_bangla_prekar(chars[i]):
j = 1

while (
i - j >= 0
and i - j > barrier
and validate.is_bangla_banjonborno(chars[i - j])
and validate.is_bangla_halant(chars[i - j - 1])
):
j += 2

chars[i - j], chars[i] = chars[i], chars[i - j]
barrier = i + 1

if (
i < length - 1
and validate.is_bangla_halant(chars[i])
and chars[i - 1] == "র"
and not validate.is_bangla_halant(chars[i - 2])
):
j = 1
found_pre_kar = 0

while True:
if validate.is_bangla_banjonborno(chars[i + j]):
if validate.is_bangla_halant(chars[i + j + 1]):
j += 2
elif validate.is_bangla_prekar(chars[i + j + 1]):
found_pre_kar = 1
break
else:
break

chars[i - 1], chars[i], chars[i + 1 : i + j + found_pre_kar + 1], chars[i + j + 1 :] = (
chars[i + j + 1],
chars[i + 1 : i + j + 1],
chars[i - 1],
chars[i],
chars[i + j + found_pre_kar + 1 :],
)
i += j + found_pre_kar
barrier = i + 1

return "".join(chars)


@lru_cache(maxsize=128)
def _find_in_remap(text: str, *, reversed: bool = False) -> Tuple[str, bool]:
"""
Finds and returns the remapped value for a given text.
Returns a tuple of two elements:
- (`str`): The remapped text.
- (`bool`) Whether manual intervention is required.
"""

previous_text = text

for key, value in config.AVRO_EXCEPTIONS.items():
if reversed:
text = text.replace(key, value) if key.lower() in text.lower() else text
else:
text = text.replace(value, key) if (value := value.lower()) in text.lower() else text

manual_required = any(
word == previous_word for word, previous_word in zip(text.split(), previous_text.split())
)

return (text, manual_required)


def _match_patterns(
fixed_text: str, cur: int = 0, rule: bool = False, reversed: bool = False
) -> Dict[str, Any]:
"""
Matches given text at cursor position with rule / non rule patterns.
Returns a dictionary of three (upto four) elements.
"""

rule_type = NON_RULE_PATTERNS if not rule else RULE_PATTERNS
pattern = _exact_find_in_pattern(fixed_text, reversed, cur, rule_type)

if pattern:
p = pattern[0]

return {
"matched": True,
"found": p.get("find"),
"replaced": p.get("replace"),
"reversed": _reverse_with_rules(cur, fixed_text, p.get("reverse")) if not rule else None,
"rules": p.get("rules") if rule else None,
}

return {
"matched": False,
"found": None,
"replaced": fixed_text[cur],
"rules": None if rule else None,
}


def _exact_find_in_pattern(
fixed_text: str, reversed: bool, cur: int = 0, patterns: Any = PATTERNS
) -> List[Dict[str, Any]]:
"""
Returns pattern items that match given text, cursor position and pattern.
"""

if reversed:
return [
x
for x in patterns
if (cur + len(x["replace"]) <= len(fixed_text))
and x["replace"] == fixed_text[cur : (cur + len(x["replace"]))]
]

return [
x
for x in patterns
if x.get("find", None)
and (cur + len(x["find"]) <= len(fixed_text))
and x["find"] == fixed_text[cur : (cur + len(x["find"]))]
]


def _reverse_with_rules(cursor: int, fixed_text: str, text_reversed: str) -> str:
"""
Enhances the word with rules for reverse-parsing.
"""

added_suffix = ""

if not (
fixed_text[cursor] in config.AVRO_KAR
or fixed_text[cursor] in config.AVRO_SHORBORNO
or fixed_text[cursor] in config.AVRO_IGNORE
or len(fixed_text) == cursor + 1
):
added_suffix = "o"

with contextlib.suppress(IndexError):
if (fixed_text[cursor + 1] in config.AVRO_KAR) or (
fixed_text[cursor + 2] in config.AVRO_KAR and not cursor == 0
):
added_suffix = ""

return text_reversed if not text_reversed else text_reversed + added_suffix


def _process_rules(rules: Dict[str, Any], fixed_text: str, cur: int = 0, cur_end: int = 1) -> Optional[str]:
"""
Process rules matched in pattern and returns suitable replacement.
If any rule's condition is satisfied, output the rules "replace",
else output None.
"""

replaced = ""

# Iterate through rules.
for rule in rules:
matched = False

for match in rule["matches"]:
matched = _process_match(match, fixed_text, cur, cur_end)

if not matched:
break

if matched:
replaced = rule["replace"]
break

return replaced if matched else None


def _process_match(match: Any, fixed_text: str, cur: int, cur_end: int) -> bool:
"""
Processes a single match in rules.
"""

# Initial/default value for replace.
replace = True

# Set check cursor depending on match['type']
chk = cur - 1 if match["type"] == "prefix" else cur_end

# Set scope based on whether scope is negative.
if match["scope"].startswith("!"):
scope = match["scope"][1:]
negative = True
else:
scope = match["scope"]
negative = False

# Let the matching begin!
if scope == "punctuation":
if (
not (
(chk < 0 and match["type"] == "prefix")
or (chk >= len(fixed_text) and match["type"] == "suffix")
or validate.is_punctuation(fixed_text[chk])
)
!= negative
):
replace = False

elif scope == "vowel":
if (
not (
(
(chk >= 0 and match["type"] == "prefix")
or (chk < len(fixed_text) and match["type"] == "suffix")
)
and validate.is_vowel(fixed_text[chk])
)
!= negative
):
replace = False

elif scope == "consonant":
if (
not (
(
(chk >= 0 and match["type"] == "prefix")
or (chk < len(fixed_text) and match["type"] == "suffix")
)
and validate.is_consonant(fixed_text[chk])
)
!= negative
):
replace = False

elif scope == "exact":
if match["type"] == "prefix":
exact_start = cur - len(match["value"])
exact_end = cur
else:
exact_start = cur_end
exact_end = cur_end + len(match["value"])

if not validate.is_exact(match["value"], fixed_text, exact_start, exact_end, negative):
replace = False

return replace
6 changes: 6 additions & 0 deletions avro/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
# SPDX-License-Identifier: MIT

"""
Core utility package for avro.py
Licensed under: MIT License
"""
2 changes: 1 addition & 1 deletion avro/config.py → avro/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


# Import local modules.
from .resources import DICT
from ..resources import DICT

# Shortcuts to vowels, constants, case-sensitives and numbers.
AVRO_VOWELS = set(DICT["avro"]["vowel"])
Expand Down
2 changes: 1 addition & 1 deletion avro/utils/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


# Import local modules.
from avro import config
from avro.utils import config


# Functions.
Expand Down
Loading

0 comments on commit 3f4211f

Please sign in to comment.