Skip to content

Commit

Permalink
Add upload-score command
Browse files Browse the repository at this point in the history
  • Loading branch information
HippocampusGirl committed Mar 26, 2024
1 parent 193bb4a commit deb3026
Show file tree
Hide file tree
Showing 10 changed files with 166 additions and 7 deletions.
10 changes: 6 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ FROM base as conda

RUN curl --silent --show-error --location \
"https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \
--output "conda.sh" && \
--output "conda.sh" && \
bash conda.sh -b -p /opt/conda && \
rm conda.sh && \
conda config --system --append channels "bioconda" && \
Expand Down Expand Up @@ -70,12 +70,16 @@ RUN --mount=source=recipes/r-saige,target=/r-saige \
conda mambabuild --no-anaconda-upload "r-saige" && \
conda build purge

RUN --mount=source=recipes/upload,target=/upload \
conda mambabuild --no-anaconda-upload "upload" && \
conda build purge

# mount .git folder too for setuptools_scm
RUN --mount=source=recipes/gwas,target=/gwas-protocol/recipes/gwas \
--mount=source=src/gwas,target=/gwas-protocol/src/gwas \
--mount=source=.git,target=/gwas-protocol/.git \
cd gwas-protocol/recipes && \
conda mambabuild --no-anaconda-upload "gwas" && \
conda mambabuild --no-anaconda-upload --use-local "gwas" && \
conda build purge

RUN conda index /opt/conda/conda-bld
Expand All @@ -94,7 +98,6 @@ RUN mamba install --yes --use-local \
"plink2" \
"r-skat" \
"tabix" \
"bzip2" \
"p7zip>=15.09" \
"parallel" \
"bolt-lmm" \
Expand All @@ -114,7 +117,6 @@ RUN mamba install --yes --use-local \
find /opt/conda/ -follow -type f -name "*.a" -delete && \
sync


# Final
# =====
FROM base
Expand Down
2 changes: 2 additions & 0 deletions recipes/gwas/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ requirements:
- xz
- bzip2
- lz4-c
# Transfer results
- upload

test:
commands:
Expand Down
19 changes: 19 additions & 0 deletions recipes/upload/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

set -x
set -e

# Use nodejs from build prefix
rm "${PREFIX}/bin/node"
ln -s "${BUILD_PREFIX}/bin/node" "${PREFIX}/bin/node"

# Don't use pre-built gyp packages
export npm_config_build_from_source=true

# Ignore custom configuration on build machine
export NPM_CONFIG_USERCONFIG=/tmp/nonexistentrc

npm install
npm run build
packed=$(npm pack)
npm install --global "${packed}"
33 changes: 33 additions & 0 deletions recipes/upload/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{% set name = "upload" %}
{% set version = "0.4.1" %}

package:
name: {{ name }}
version: {{ version }}

source:
url: https://github.com/hippocampusgirl/{{ name }}/archive/v{{ version }}.tar.gz
sha256: 395f336fa2dc913e84899f1f1fee8a2f57b650f4b6536c079ddba8ca48af7e62

build:
noarch: generic

requirements:
build:
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- nodejs
- make
host:
- nodejs
run:
- nodejs

test:
requires:
- nodejs
commands:
- upload --help

about:
home: https://github.com/hippocampusgirl/upload
1 change: 1 addition & 0 deletions src/gwas/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ score = "gwas.score.cli:main"
stratify = "gwas.stratify.cli:main"
transpose = "gwas.transpose.cli:main"
tri = "gwas.tri.cli:main"
upload-score = "gwas.upload_score.cli:main"

[build-system]
requires = [
Expand Down
7 changes: 5 additions & 2 deletions src/gwas/src/gwas/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _showwarning(message, category, filename, lineno, file=None, line=None):
)


def setup_logging(level: str | int, log_path: Path) -> None:
def setup_logging(level: str | int, log_path: Path | None = None) -> None:
root = logging.getLogger()
root.setLevel(level)

Expand All @@ -31,8 +31,11 @@ def setup_logging(level: str | int, log_path: Path) -> None:

handlers: list[logging.Handler] = [
logging.StreamHandler(),
logging.FileHandler(log_path / "log.txt", "a", errors="backslashreplace"),
]
if log_path is not None:
handlers.append(
logging.FileHandler(log_path / "log.txt", "a", errors="backslashreplace")
)
for handler in handlers:
handler.setFormatter(formatter)
root.addHandler(handler)
Expand Down
5 changes: 4 additions & 1 deletion src/gwas/src/gwas/mem/wkspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from itertools import pairwise
from mmap import MAP_SHARED, mmap
from multiprocessing import reduction as mp_reduction
from pprint import pformat
from typing import Callable

import numpy as np
Expand Down Expand Up @@ -87,7 +88,9 @@ def alloc(self, name: str, *shape: int, dtype: str = "f8"):
# check for overflow
end = start + size - 1
if end >= self.size:
raise MemoryError
raise MemoryError(
f"No space left in shared memory buffer: {pformat(allocations)}"
)

allocations[name] = Allocation(start, size, shape, dtype)

Expand Down
2 changes: 2 additions & 0 deletions src/gwas/src/gwas/score/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def split_by_missing_values(
Callable[[int | str, VariableCollection], Eigendecomposition] | None
) = None,
) -> list[VariableCollection]:
sw = base_variable_collection.sw
# Load phenotype and covariate data for all samples that have genetic data and
# count missing values.
phenotype_names = base_variable_collection.phenotype_names
Expand Down Expand Up @@ -157,6 +158,7 @@ def split_by_missing_values(
)

variable_collections.append(variable_collection)
sw.squash()

# Sort by number of phenotypes
variable_collections.sort(key=lambda vc: -vc.phenotype_count)
Expand Down
Empty file.
94 changes: 94 additions & 0 deletions src/gwas/src/gwas/upload_score/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-
import logging
import sys
from argparse import ArgumentParser, Namespace
from multiprocessing import cpu_count
from pathlib import Path
from subprocess import call
from tempfile import TemporaryDirectory

from ..log import logger, setup_logging
from ..utils import unwrap_which

upload_executable = unwrap_which("upload")

path_patterns: list[str] = [
"chr*.metadata.yaml.gz",
"chr*.score.b2array",
"chr*.score.axis-metadata.pkl.zst",
"covariance.b2array",
"covariance.axis-metadata.pkl.zst",
"populations.pdf",
"*stat-*_statmap.nii.gz",
"*stat-*_statmap.json",
"*mask.nii.gz",
"*mean.nii.gz",
"*std.nii.gz",
"*contrast_matrix.tsv",
]


def upload(arguments: Namespace) -> None:
upload_paths: list[Path] = list()
for input_directory_str in arguments.input_directory:
input_directory = Path(input_directory_str).absolute()
for path_pattern in path_patterns:
upload_paths.extend(input_directory.glob(f"**/{path_pattern}"))

with TemporaryDirectory() as tmp_path_str:
tmp_path = Path(tmp_path_str)
paths: set[str] = set()
for upload_path in upload_paths:
link_path = tmp_path / upload_path.name
link_path.parent.mkdir(parents=True, exist_ok=True)
link_path.symlink_to(upload_path)
paths.add(link_path.name)

logger.info(f"Uploading {len(paths)} files")

command: list[str] = [
upload_executable,
"upload-client",
"--token",
arguments.token,
"--endpoint",
arguments.endpoint,
"--path",
*sorted(paths),
]
if arguments.log_level == "DEBUG":
command.append("--debug")

logger.debug(f"Running command: {' '.join(command)}")
call(command, cwd=tmp_path)


def parse_arguments(argv: list[str]) -> Namespace:
argument_parser = ArgumentParser()
argument_parser.add_argument("--input-directory", nargs="+", required=True)

argument_parser.add_argument("--token", required=True)
argument_parser.add_argument("--endpoint", default="https://upload.gwas.science")

# Program options
argument_parser.add_argument(
"--log-level", choices=logging.getLevelNamesMapping().keys(), default="INFO"
)
argument_parser.add_argument("--debug", action="store_true", default=False)
argument_parser.add_argument("--num-threads", type=int, default=cpu_count())

return argument_parser.parse_args(argv)


def main():
arguments = parse_arguments(sys.argv[1:])
setup_logging(level=arguments.log_level)

try:
upload(arguments)
except Exception as e:
logger.exception("Exception: %s", e, exc_info=True)
if arguments.debug:
import pdb

pdb.post_mortem()

0 comments on commit deb3026

Please sign in to comment.