Skip to content

Commit

Permalink
Add/feature(repeats+doube_runs) (#45)
Browse files Browse the repository at this point in the history
* implement `double_runs` property

* add testcase for double_runs

* implement `repeats` method

* add testcase for repeats function

* `CHANGELOG.md` updated

* `README.md` updated

* use `VALID_BASES` and refactor `double_runs` by using `repeats` method

* add doube_runs to cache test
  • Loading branch information
AHReccese authored Jan 28, 2025
1 parent adce824 commit a11b05f
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]
### Added
- `double_runs` property
- `repeats` method
- `name` property
### Changed
- Test system modified
Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,20 @@
>>> primer1.single_runs
{'A': 2, 'T': 0, 'C': 0, 'G': 2}
```
#### Double run length
```pycon
>>> primer1.double_runs
{'AT': 0, 'AG': 4, 'AC': 0, 'TA': 0, 'TG': 0, 'TC': 0, 'GA': 5, 'GT': 0, 'GC': 0, 'CA': 0, 'CT': 0, 'CG': 0}
```
#### Repeats
```pycon
>>> primer1.repeats(sequence="GG", consecutive=False)
4
```
```pycon
>>> primer1.repeats(sequence="GG", consecutive=True)
0
```
#### Melting temperature
```pycon
>>> primer1.melting_temperature()
Expand Down
40 changes: 40 additions & 0 deletions opr/primer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
"""OPR primer."""
import re
import itertools
from enum import Enum
from warnings import warn
from .errors import OPRBaseError
Expand Down Expand Up @@ -44,6 +46,7 @@ def __init__(self, sequence, name=DEFAULT_PRIMER_NAME):
self._gc_content = None
self._gc_clamp = None
self._single_runs = None
self._double_runs = None
self._melting_temperature = {
MeltingTemperature.BASIC: None,
MeltingTemperature.SALT_ADJUSTED: None,
Expand Down Expand Up @@ -225,6 +228,43 @@ def single_runs(self):
self._single_runs[base] = single_run_length(self._sequence, base)
return self._single_runs

@property
def double_runs(self):
"""
Calculate Double Runs of the primer.
It refers to how many times each 2-base pairs occurs consecutively in the primer.
:return: Dictionary of double runs (2-base pairs) and their counts in the primer
"""
if self._double_runs is None:
pairs = [''.join(pair) for pair in itertools.product(VALID_BASES, repeat=2) if pair[0] != pair[1]]
counts = {pair: 0 for pair in pairs}
for pair in counts:
counts[pair] = self.repeats(pair, consecutive=True)
self._double_runs = counts
return self._double_runs

def repeats(self, sequence, consecutive=False):
"""
Count occurrences of a subsequence in a given sequence.
:param sequence: The sequence to search within.
:type sequence: str
:param consecutive: Whether to count only consecutive repeats.
:type consecutive: bool
:return: The count of occurrences.
"""
if consecutive:
pattern = f"(?:{re.escape(sequence)})+"
matches = re.findall(f"({pattern})+", self.sequence)
result = max((len(match) // len(sequence) for match in matches), default=0)
if result == 1:
result = 0
return result
else:
return self.sequence.count(sequence)

def melting_temperature(self, method=MeltingTemperature.BASIC):
"""
Calculate(if needed) the melting temperature.
Expand Down
12 changes: 12 additions & 0 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import itertools
from opr.params import VALID_BASES
from opr import Primer, MeltingTemperature

TEST_CASE_NAME = "Cache tests"
Expand Down Expand Up @@ -32,3 +34,13 @@ def test_single_runs():
runs = oprimer.single_runs
assert oprimer.single_runs['A'] == runs['A'] and oprimer.single_runs['T'] == runs[
'T'] and oprimer.single_runs['C'] == runs['C'] and oprimer.single_runs['G'] == runs['G']


def test_double_runs():
p1 = Primer("ATATCGAACACACACACA")
double_runs = p1.double_runs
pairs = [''.join(pair) for pair in itertools.product(VALID_BASES, repeat=2) if pair[0] != pair[1]]
double_runs_2nd = {}
for pair in pairs:
double_runs_2nd[pair] = p1.double_runs[pair]
assert len(double_runs_2nd) == len(double_runs) and all(double_runs[pair] == double_runs_2nd[pair] for pair in double_runs)
32 changes: 32 additions & 0 deletions tests/test_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,35 @@ def test_single_runs_3(): # Reference: https://www.oligoevaluator.com/OligoCalc
oprimer = Primer("AAAAATTCGGGGATCCCCG")
runs = oprimer.single_runs
assert runs['A'] == 5 and runs['T'] == 2 and runs['C'] == 4 and runs['G'] == 4


def test_double_runs():
p1 = Primer("ATATCGAACACACACACA")
double_runs = p1.double_runs
print(double_runs)
true_answer = {'GT': 0, 'CA': 5, 'AT': 2, 'TA': 0, 'GC': 0, 'GA': 0, 'AG': 0, 'TG': 0, 'CG': 0, 'TC': 0, 'AC': 5, 'CT': 0}
assert len(true_answer) == len(double_runs) and all(double_runs[pair] == true_answer[pair] for pair in double_runs)


def test_repeats_1():
p = Primer("ATCG")
assert (
p.repeats(sequence="A", consecutive=False) == 1 and
p.repeats(sequence="AT", consecutive=False) == 1 and
p.repeats(sequence="AC", consecutive=False) == 0 and
p.repeats(sequence="A", consecutive=True) == 0 and
p.repeats(sequence="AT", consecutive=True) == 0
)


def test_repeats_2():
p = Primer("AAAATCGTGT")
assert (
p.repeats(sequence="AA", consecutive=True) == 2 and
p.repeats(sequence="GT", consecutive=True) == 2
)


def test_repeats_3():
p = Primer("ATCGATCGATCG")
assert p.repeats(sequence="ATCG", consecutive=True) == 3

0 comments on commit a11b05f

Please sign in to comment.