diff --git a/CHANGELOG.md b/CHANGELOG.md index 71689ad..f061262 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added +- `double_runs` property +- `repeats` method - `name` property ### Changed - Test system modified diff --git a/README.md b/README.md index 62a4ee9..56fc97a 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,20 @@ >>> primer1.single_runs {'A': 2, 'T': 0, 'C': 0, 'G': 2} ``` +#### Double run length +```pycon +>>> primer1.double_runs +{'AT': 0, 'AG': 4, 'AC': 0, 'TA': 0, 'TG': 0, 'TC': 0, 'GA': 5, 'GT': 0, 'GC': 0, 'CA': 0, 'CT': 0, 'CG': 0} +``` +#### Repeats +```pycon +>>> primer1.repeats(sequence="GG", consecutive=False) +4 +``` +```pycon +>>> primer1.repeats(sequence="GG", consecutive=True) +0 +``` #### Melting temperature ```pycon >>> primer1.melting_temperature() diff --git a/opr/primer.py b/opr/primer.py index bdb2c53..de44382 100644 --- a/opr/primer.py +++ b/opr/primer.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- """OPR primer.""" +import re +import itertools from enum import Enum from warnings import warn from .errors import OPRBaseError @@ -44,6 +46,7 @@ def __init__(self, sequence, name=DEFAULT_PRIMER_NAME): self._gc_content = None self._gc_clamp = None self._single_runs = None + self._double_runs = None self._melting_temperature = { MeltingTemperature.BASIC: None, MeltingTemperature.SALT_ADJUSTED: None, @@ -225,6 +228,43 @@ def single_runs(self): self._single_runs[base] = single_run_length(self._sequence, base) return self._single_runs + @property + def double_runs(self): + """ + Calculate Double Runs of the primer. + + It refers to how many times each 2-base pairs occurs consecutively in the primer. + + :return: Dictionary of double runs (2-base pairs) and their counts in the primer + """ + if self._double_runs is None: + pairs = [''.join(pair) for pair in itertools.product(VALID_BASES, repeat=2) if pair[0] != pair[1]] + counts = {pair: 0 for pair in pairs} + for pair in counts: + counts[pair] = self.repeats(pair, consecutive=True) + self._double_runs = counts + return self._double_runs + + def repeats(self, sequence, consecutive=False): + """ + Count occurrences of a subsequence in a given sequence. + + :param sequence: The sequence to search within. + :type sequence: str + :param consecutive: Whether to count only consecutive repeats. + :type consecutive: bool + :return: The count of occurrences. + """ + if consecutive: + pattern = f"(?:{re.escape(sequence)})+" + matches = re.findall(f"({pattern})+", self.sequence) + result = max((len(match) // len(sequence) for match in matches), default=0) + if result == 1: + result = 0 + return result + else: + return self.sequence.count(sequence) + def melting_temperature(self, method=MeltingTemperature.BASIC): """ Calculate(if needed) the melting temperature. diff --git a/tests/test_cache.py b/tests/test_cache.py index d1edff9..46f08f3 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,3 +1,5 @@ +import itertools +from opr.params import VALID_BASES from opr import Primer, MeltingTemperature TEST_CASE_NAME = "Cache tests" @@ -32,3 +34,13 @@ def test_single_runs(): runs = oprimer.single_runs assert oprimer.single_runs['A'] == runs['A'] and oprimer.single_runs['T'] == runs[ 'T'] and oprimer.single_runs['C'] == runs['C'] and oprimer.single_runs['G'] == runs['G'] + + +def test_double_runs(): + p1 = Primer("ATATCGAACACACACACA") + double_runs = p1.double_runs + pairs = [''.join(pair) for pair in itertools.product(VALID_BASES, repeat=2) if pair[0] != pair[1]] + double_runs_2nd = {} + for pair in pairs: + double_runs_2nd[pair] = p1.double_runs[pair] + assert len(double_runs_2nd) == len(double_runs) and all(double_runs[pair] == double_runs_2nd[pair] for pair in double_runs) diff --git a/tests/test_calculations.py b/tests/test_calculations.py index ebbe542..fe4bf71 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -66,3 +66,35 @@ def test_single_runs_3(): # Reference: https://www.oligoevaluator.com/OligoCalc oprimer = Primer("AAAAATTCGGGGATCCCCG") runs = oprimer.single_runs assert runs['A'] == 5 and runs['T'] == 2 and runs['C'] == 4 and runs['G'] == 4 + + +def test_double_runs(): + p1 = Primer("ATATCGAACACACACACA") + double_runs = p1.double_runs + print(double_runs) + true_answer = {'GT': 0, 'CA': 5, 'AT': 2, 'TA': 0, 'GC': 0, 'GA': 0, 'AG': 0, 'TG': 0, 'CG': 0, 'TC': 0, 'AC': 5, 'CT': 0} + assert len(true_answer) == len(double_runs) and all(double_runs[pair] == true_answer[pair] for pair in double_runs) + + +def test_repeats_1(): + p = Primer("ATCG") + assert ( + p.repeats(sequence="A", consecutive=False) == 1 and + p.repeats(sequence="AT", consecutive=False) == 1 and + p.repeats(sequence="AC", consecutive=False) == 0 and + p.repeats(sequence="A", consecutive=True) == 0 and + p.repeats(sequence="AT", consecutive=True) == 0 + ) + + +def test_repeats_2(): + p = Primer("AAAATCGTGT") + assert ( + p.repeats(sequence="AA", consecutive=True) == 2 and + p.repeats(sequence="GT", consecutive=True) == 2 + ) + + +def test_repeats_3(): + p = Primer("ATCGATCGATCG") + assert p.repeats(sequence="ATCG", consecutive=True) == 3