Skip to content

Commit

Permalink
Fix logic for handling missing r2 values in vcf files
Browse files Browse the repository at this point in the history
Reported-by: Zhijie Liao <[email protected]>
Reported-by: Lachlan Strike <[email protected]>
  • Loading branch information
HippocampusGirl committed Dec 19, 2024
1 parent 18a4bfa commit 46bf413
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
6 changes: 2 additions & 4 deletions src/gwas/src/gwas/utils/genetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@ def chromosomes_set() -> set[int | str]:

def greater_or_close(series: pd.Series, cutoff: float) -> npt.NDArray[np.bool_]:
value = np.asarray(series.values)
result = np.logical_or(
np.logical_or(value >= cutoff, np.isclose(value, cutoff)), np.isnan(value)
)
result = np.logical_or(value >= cutoff, np.isclose(value, cutoff))
return result


Expand All @@ -67,5 +65,5 @@ def make_variant_mask(
variant_mask = greater_or_close(
allele_frequencies,
minor_allele_frequency_cutoff,
) & greater_or_close(r_squared, r_squared_cutoff)
) & (greater_or_close(r_squared, r_squared_cutoff) | np.isnan(r_squared))
return variant_mask
22 changes: 22 additions & 0 deletions src/gwas/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging

import numpy as np
import pandas as pd
from pytest import FixtureRequest, LogCaptureFixture
from upath import UPath

Expand All @@ -9,6 +11,7 @@
setup_logging,
teardown_logging,
)
from gwas.utils.genetics import greater_or_close, make_variant_mask
from gwas.utils.multiprocessing import Process


Expand Down Expand Up @@ -40,3 +43,22 @@ def test_process(
assert "Hello, world!" in caplog.text
assert exception_queue.empty()
assert (tmp_path / "log.txt").is_file()


def test_greater_or_close() -> None:
x = pd.Series([np.nan, 0.0, 0.01 - 1e-32, 0.01, 0.1])
assert np.all(greater_or_close(x, 0.01) == [False, False, True, True, True])


def test_make_variant_mask() -> None:
allele_frequencies = pd.Series([np.nan, 0.0, 0.01 - 1e-32, 0.01, 0.1])
r_squared = pd.Series([1.0, 1.0, 1.0, 1.0, np.nan])
assert np.all(
make_variant_mask(
allele_frequencies,
r_squared,
minor_allele_frequency_cutoff=0.01,
r_squared_cutoff=0.8,
)
== [False, False, True, True, True]
)

0 comments on commit 46bf413

Please sign in to comment.