Skip to content

Commit

Permalink
Fix unit tests, update recipes and fix upload_score script
Browse files Browse the repository at this point in the history
Use absolute paths when uploading to avoid conflicts
  • Loading branch information
HippocampusGirl committed Nov 17, 2024
1 parent f011a9a commit c70225f
Show file tree
Hide file tree
Showing 12 changed files with 148 additions and 92 deletions.
74 changes: 53 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,44 +39,74 @@ FROM conda AS builder
RUN conda install --yes "c-compiler" "conda-build"
COPY recipes/conda_build_config.yaml /root/conda_build_config.yaml

RUN cat <<EOF > "/usr/bin/retry"
#!/bin/bash
set -euo pipefail
timeout="1"
attempt="1"
until "\$@"; do
if [ "\${attempt}" -ge "5" ]; then
exit "$?"
fi
sleep "\${timeout}"
timeout=\$((timeout * 10))
attempt=\$((attempt + 1))
done
EOF
RUN chmod "+x" "/usr/bin/retry"

# https://danieldk.eu/Posts/2020-08-31-MKL-Zen.html
RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" | \
gcc -x "c" -shared -fPIC -o "/opt/libfakeintel.so" -

FROM builder as dosage-convertor
FROM builder AS dosage-convertor
RUN --mount=source=recipes/dosage-convertor,target=/dosage-convertor \
conda build --no-anaconda-upload --numpy "2.0" "dosage-convertor"

FROM builder as ldsc
RUN --mount=source=recipes/ldsc,target=/ldsc \
conda build --no-anaconda-upload --numpy "2.0" --use-local "ldsc"
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" "dosage-convertor"

FROM builder as metal
FROM builder AS metal
RUN --mount=source=recipes/metal,target=/metal \
conda build --no-anaconda-upload --numpy "2.0" --use-local "metal"
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "metal"

FROM builder as qctool
FROM builder AS pynose
RUN --mount=source=recipes/pynose,target=/pynose \
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "pynose"

FROM builder AS qctool
RUN --mount=source=recipes/qctool,target=/qctool \
conda build --no-anaconda-upload --numpy "2.0" --use-local "qctool"
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "qctool"

FROM builder as raremetal
FROM builder AS raremetal
RUN --mount=source=recipes/raremetal,target=/raremetal \
conda build --no-anaconda-upload --numpy "2.0" --use-local "raremetal"
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "raremetal"

FROM builder as raremetal-debug
FROM builder AS raremetal-debug
RUN --mount=source=recipes/raremetal,target=/raremetal \
--mount=source=recipes/raremetal-debug,target=/raremetal-debug \
conda build --no-anaconda-upload --numpy "2.0" --use-local "raremetal-debug"
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "raremetal-debug"

FROM builder as r-gmmat
FROM builder AS r-gmmat
RUN --mount=source=recipes/r-gmmat,target=/r-gmmat \
conda build --no-anaconda-upload --numpy "2.0" --use-local "r-gmmat"
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "r-gmmat"

FROM builder as upload
FROM builder AS upload
RUN --mount=source=recipes/upload,target=/upload \
conda build --no-anaconda-upload --numpy "2.0" --use-local "upload"
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "upload"

FROM builder AS ldsc
COPY --from=pynose /opt/conda/conda-bld /opt/conda/conda-bld
RUN --mount=source=recipes/ldsc,target=/ldsc \
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "ldsc"

FROM builder as gwas
FROM builder AS gwas
COPY --from=dosage-convertor /opt/conda/conda-bld /opt/conda/conda-bld
COPY --from=ldsc /opt/conda/conda-bld /opt/conda/conda-bld
COPY --from=metal /opt/conda/conda-bld /opt/conda/conda-bld
Expand All @@ -90,14 +120,16 @@ RUN conda index /opt/conda/conda-bld
RUN --mount=source=recipes/gwas,target=/gwas-protocol/recipes/gwas \
--mount=source=src/gwas,target=/gwas-protocol/src/gwas \
--mount=source=.git,target=/gwas-protocol/.git \
conda build --no-anaconda-upload --numpy "2.0" --use-local "gwas-protocol/recipes/gwas"
--mount=type=cache,target=/opt/conda/pkgs \
retry conda build --no-anaconda-upload --numpy "2.0" --use-local "gwas-protocol/recipes/gwas"

# Install packages
# ================
FROM conda AS install

COPY --from=gwas /opt/conda/conda-bld /opt/conda/conda-bld
RUN conda install --yes --use-local \
RUN --mount=type=cache,target=/opt/conda/pkgs \
conda install --yes --use-local \
"parallel" \
"dosage-convertor" \
"qctool" \
Expand Down
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,15 @@ Next, start an interactive shell inside the container using one of the following

## Development

To create a local development environment install [Miniforge](https://github.com/conda-forge/miniforge) and create a `.condarc` file in your home directory with the following contents:
To create a local development environment install [Micromamba](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html) and create a `.condarc` file in your home directory with the following contents:

```
channels:
- conda-forge
- bioconda
```

Then update your `.bashrc` or `.zshrc` with `mamba init`. This will allow you to use the `conda` command.

Next, install `mamba` using `conda install mamba` and then create the environment using the following command:
Next, create the environment using the following command:

```bash
micromamba create --name "gwas-protocol" \
Expand Down
2 changes: 2 additions & 0 deletions recipes/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ gcc_version:
- 13.2.0
zstd:
- 1.5.5
libdeflate:
- 1.21
28 changes: 15 additions & 13 deletions recipes/ldsc/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,38 +1,40 @@
{% set name = "ldsc" %}
{% set version = "2.0.1" %}

package:
name: {{ name|lower }}
version: {{ version }}
name: ldsc
version: 2.0.2

source:
url: https://pypi.org/packages/source/{{ name[0] }}/{{ name }}/ldsc-{{ version }}.tar.gz
sha256: fe72f99da8a26414d82e47f2d2ee7cebbbab6c20d1b4ea51a0c38cc650c63556
url: "https://github.com/belowlab/ldsc/archive/refs/tags/2.0.2.tar.gz"
sha256: "f28c2fc77c05aa59f293e2666ba38a69610b8e153ff3b7f382dec53601bebc71"

build:
script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation
noarch: python
script: |
nosetests --exclude="test_nextSNPs|test_nextSNPs_maf_ref|test_get_compression_gzip"
{{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation
number: 0

requirements:
host:
- python
- pip
- pynose
- bitarray >=2.6.0
- pybedtools >0.9.1
- scipy >=1.9.2
- numpy >=1.23.3
- pandas >=1.5.0
run:
- python
- bitarray >=2.6.0
- nose >=1.3.7
- pybedtools >=0.9.0
- pybedtools >0.9.1
- scipy >=1.9.2
- numpy >=1.23.3
- pandas >=1.5.0

test:
requires:
- nose
imports:
- ldscore.ldscore
commands:
- nosetests ldscore
- make_annot.py --help 2>&1 | grep usage
- munge_sumstats.py --help 2>&1 | grep usage
- ldsc.py --help 2>&1 | grep usage
Expand Down
43 changes: 43 additions & 0 deletions recipes/pynose/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{% set name = "pynose" %}
{% set version = "1.5.3" %}

package:
name: {{ name|lower }}
version: {{ version }}

source:
url: https://pypi.org/packages/source/{{ name[0] }}/{{ name }}/pynose-{{ version }}.tar.gz
sha256: 1b00ab94447cd7fcbb0a344fc1435137404c043db4a8e3cda63ca2893f8e5903

build:
entry_points:
- nosetests = nose.core:run_exit
- pynose = nose.core:run_exit
noarch: python
script: {{ PYTHON }} -m pip install . -vv --no-deps --no-build-isolation
number: 0

requirements:
host:
- python >=3.7
- setuptools >=68.0.0
- wheel >=0.42.0
- pip
run:
- python >=3.7

test:
imports:
- nose
commands:
- nosetests --help
- pynose --help

about:
home: https://github.com/mdmintz/pynose
summary: pynose fixes nose to extend unittest and make testing easier
dev_url: https://github.com/mdmintz/pynose
license: LGPL-2.1 AND EPL-2.0
license_file:
- LICENSE
- LICENSE.cpython
6 changes: 3 additions & 3 deletions recipes/upload/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@

package:
name: upload
version: 0.5.8
version: 0.5.10

source:
url: "https://github.com/hippocampusgirl/upload/archive/v0.5.8.tar.gz"
sha256: dd6b818fb6ba4c91e387e22ddcd30dfac3d827bd87e8354d8a39c661ce387948
url: "https://github.com/hippocampusgirl/upload/archive/v0.5.10.tar.gz"
sha256: "29a097cb8bdccf9ef672e8ba160a12650aabf40f9f9c3f56b32ab5b8b81599cd"

build:
noarch: generic
Expand Down
6 changes: 5 additions & 1 deletion src/gwas/src/gwas/compression/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,11 @@ def close(
f'Decompression failed with code {returncode} and message "{data}"'
)
elif data:
logger.warning(f'Decompression received stderr: "{data}"')
message = f'Decompression received stderr: "{data}"'
if "premature end" in data:
raise ValueError(message)
else:
logger.warning(message)

self.process_handle = None
self.input_file_handle = None
Expand Down
3 changes: 2 additions & 1 deletion src/gwas/src/gwas/meta/worker/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def worker(job: Job, output_directory: UPath, arguments: Namespace) -> None:
cache_path.mkdir(parents=True, exist_ok=True)

tags = dict(parse(job.name))
for key in ["population", "age", "feature"]:
output_directory = output_directory / "worker-outputs"
for key in ["population", "age", "feature", "taskcontrast"]:
if key not in tags:
continue
output_directory = output_directory / f"{key}-{tags[key]}"
Expand Down
12 changes: 6 additions & 6 deletions src/gwas/src/gwas/meta/worker/ldsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from dataclasses import dataclass
from subprocess import PIPE, STDOUT, run
from tempfile import TemporaryDirectory
from typing import Literal

import polars as pl
from upath import UPath
Expand Down Expand Up @@ -115,7 +114,7 @@ class LDSCOutput:
ldsc_mean_chisq: float
ldsc_lambda_gc: float
ldsc_intercept: ValueSE
ldsc_ratio: ValueSE | Literal["< 0 (usually indicates GC correction)"]
ldsc_ratio: ValueSE | str
ldsc_total_observed_scale_h2: ValueSE


Expand Down Expand Up @@ -147,10 +146,11 @@ def parse_value_se(value_se: str) -> ValueSE:

ldsc_intercept = parse_value_se(get_value("Intercept", ldsc_log))
ldsc_ratio_str = get_value("Ratio", ldsc_log)
if ldsc_ratio_str == "< 0 (usually indicates GC correction)":
ldsc_ratio: ValueSE | Literal["< 0 (usually indicates GC correction)"] = (
"< 0 (usually indicates GC correction)"
)
if ldsc_ratio_str in {
"< 0 (usually indicates GC correction)",
"NA (mean chi^2 < 1)",
}:
ldsc_ratio: ValueSE | str = ldsc_ratio_str
else:
ldsc_ratio = parse_value_se(ldsc_ratio_str)
ldsc_total_observed_scale_h2 = parse_value_se(
Expand Down
6 changes: 5 additions & 1 deletion src/gwas/src/gwas/tri/tsqr.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
def scale(vcf_file: VCFFile, b: npt.NDArray[np.float64]) -> None:
# calculate variant properties
mean = b.mean(axis=1)
standard_deviation = b.std(axis=1)
minor_allele_frequency = mean / 2

standard_deviation = np.sqrt(
2 * minor_allele_frequency * (1 - minor_allele_frequency)
)

close_to_zero = np.isclose(standard_deviation, 0)
if close_to_zero.any():
Expand Down
Loading

0 comments on commit c70225f

Please sign in to comment.