From c31205860c2b2ac2ebb568d514d2fb192e7ade83 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 17 Oct 2024 16:05:07 -0500 Subject: [PATCH] loosen pytorch pin, remove unnecessary stuff in test_python.sh --- ci/test_python.sh | 186 ++++-------------- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-121_arch-x86_64.yaml | 2 +- .../all_cuda-124_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- .../conda/cugraph_dgl_dev_cuda-118.yaml | 2 +- .../conda/cugraph_pyg_dev_cuda-118.yaml | 2 +- 7 files changed, 44 insertions(+), 154 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index d9f64e4..a9398c0 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -8,19 +8,14 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-logger "Generate Python testing dependencies" rapids-dependency-file-generator \ --output conda \ --file-key test_python \ --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml -rapids-mamba-retry env create --yes -f env.yaml -n test - -# Temporarily allow unbound variables for conda activation. -set +u -conda activate test -set -u - rapids-logger "Downloading artifacts from previous jobs" CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) @@ -29,153 +24,48 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" -rapids-print-env - -rapids-mamba-retry install \ - --channel "${CPP_CHANNEL}" \ - --channel "${PYTHON_CHANNEL}" \ - libcugraph \ - pylibcugraph \ - cugraph \ - nx-cugraph \ - cugraph-service-server \ - cugraph-service-client - -rapids-logger "Check GPU usage" -nvidia-smi - -# RAPIDS_DATASET_ROOT_DIR is used by test scripts -export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" -pushd "${RAPIDS_DATASET_ROOT_DIR}" -./get_test_data.sh --benchmark -popd - EXITCODE=0 trap "EXITCODE=1" ERR set +e - -# Test runs that include tests that use dask require -# --import-mode=append. Those tests start a LocalCUDACluster that inherits -# changes from pytest's modifications to PYTHONPATH (which defaults to -# prepending source tree paths to PYTHONPATH). This causes the -# LocalCUDACluster subprocess to import cugraph from the source tree instead of -# the install location, and in most cases, the source tree does not have -# extensions built in-place and will result in ImportErrors. -# -# FIXME: TEMPORARILY disable MG PropertyGraph tests (experimental) tests and -# bulk sampler IO tests (hangs in CI) -if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then - if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then - # we are only testing in a single cuda version - # because of pytorch and rapids compatibilty problems - rapids-mamba-retry env create --yes -f env.yaml -n test_cugraph_dgl - - # activate test_cugraph_dgl environment for dgl - set +u - conda activate test_cugraph_dgl - set -u - rapids-mamba-retry install \ - --channel "${CPP_CHANNEL}" \ - --channel "${PYTHON_CHANNEL}" \ - --channel conda-forge \ - --channel dglteam/label/cu118 \ - --channel nvidia \ - libcugraph \ - pylibcugraph \ - pylibcugraphops \ - cugraph \ - cugraph-dgl \ - 'dgl>=1.1.0.cu*,<=2.0.0.cu*' \ - 'pytorch>=2.0' \ - 'cuda-version=11.8' - - rapids-print-env - - rapids-logger "pytest cugraph_dgl (single GPU)" - ./ci/run_cugraph_dgl_pytests.sh \ - --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-dgl.xml" \ - --cov-config=../../.coveragerc \ - --cov=cugraph_dgl \ - --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-dgl-coverage.xml" \ - --cov-report=term - - # Reactivate the test environment back - set +u - conda deactivate - conda activate test - set -u - else - rapids-logger "skipping cugraph_dgl pytest on ARM64" - fi -else - rapids-logger "skipping cugraph_dgl pytest on CUDA!=11.8" -fi - -if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then - if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then - rapids-mamba-retry env create --yes -f env.yaml -n test_cugraph_pyg - - # Temporarily allow unbound variables for conda activation. - set +u - conda activate test_cugraph_pyg - set -u - - # TODO re-enable logic once CUDA 12 is testable - #if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then - CONDA_CUDA_VERSION="11.8" - PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu118.html" - #else - # CONDA_CUDA_VERSION="12.1" - # PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu121.html" - #fi - - # Will automatically install built dependencies of cuGraph-PyG - rapids-mamba-retry install \ - --channel "${CPP_CHANNEL}" \ - --channel "${PYTHON_CHANNEL}" \ - --channel pytorch \ - --channel pyg \ - --channel nvidia \ - "cugraph-pyg" \ - "pytorch=2.1.0" \ - "pytorch-cuda=${CONDA_CUDA_VERSION}" - - # Install pyg dependencies (which requires pip) - - pip install \ - ogb \ - tensordict - - pip install \ - pyg_lib \ - torch_scatter \ - torch_sparse \ - -f ${PYG_URL} - - rapids-print-env - - rapids-logger "pytest cugraph_pyg (single GPU)" - # rmat is not tested because of multi-GPU testing - ./ci/run_cugraph_pyg_pytests.sh \ - --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-pyg.xml" \ - --cov-config=../../.coveragerc \ - --cov=cugraph_pyg \ - --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-pyg-coverage.xml" \ - --cov-report=term - - # Reactivate the test environment back - set +u - conda deactivate - conda activate test - set -u - else - rapids-logger "skipping cugraph_pyg pytest on ARM64" - fi +if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then + rapids-mamba-retry env create --yes -f env.yaml -n test_pylibwholegraph + + # Temporarily allow unbound variables for conda activation. + set +u + conda activate test_pylibwholegraph + set -u + + # Will automatically install built dependencies of pylibwholegraph + rapids-mamba-retry install \ + --channel "${CPP_CHANNEL}" \ + --channel "${PYTHON_CHANNEL}" \ + --channel pytorch \ + 'mkl<2024.1.0' \ + "pylibwholegraph=${RAPIDS_VERSION}" \ + 'pytorch::pytorch>=2.3,<2.4' \ + 'ogb' + + rapids-print-env + + rapids-logger "Check GPU usage" + nvidia-smi + + rapids-logger "pytest pylibwholegraph (single GPU)" + ./ci/run_pylibwholegraph_pytests.sh \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-pylibwholegraph.xml" \ + --cov-config=../../.coveragerc \ + --cov=cugraph_pyg \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/pylibwholegraph-coverage.xml" \ + --cov-report=term + + # Reactivate the test environment back + set +u + conda deactivate + set -u else - rapids-logger "skipping cugraph_pyg pytest on CUDA!=11.8" + rapids-logger "skipping pylibwholegraph pytest on ARM64" fi - rapids-logger "Test script exiting with value: $EXITCODE" exit ${EXITCODE} diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 6823818..017ef24 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -43,7 +43,7 @@ dependencies: - pytest-forked - pytest-xdist - pytorch-cuda=11.8 -- pytorch::pytorch>=2.0,<2.2.0a0 +- pytorch::pytorch>=2.0,<2.4.0a0 - raft-dask==24.12.*,>=0.0.0a0 - recommonmark - rmm==24.12.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml index 9254bd9..5c6bfcf 100644 --- a/conda/environments/all_cuda-121_arch-x86_64.yaml +++ b/conda/environments/all_cuda-121_arch-x86_64.yaml @@ -48,7 +48,7 @@ dependencies: - pytest-forked - pytest-xdist - pytorch-cuda=12.1 -- pytorch::pytorch>=2.0,<2.2.0a0 +- pytorch::pytorch>=2.0,<2.4.0a0 - raft-dask==24.12.*,>=0.0.0a0 - recommonmark - rmm==24.12.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-124_arch-x86_64.yaml b/conda/environments/all_cuda-124_arch-x86_64.yaml index b900c90..93f7fb1 100644 --- a/conda/environments/all_cuda-124_arch-x86_64.yaml +++ b/conda/environments/all_cuda-124_arch-x86_64.yaml @@ -48,7 +48,7 @@ dependencies: - pytest-forked - pytest-xdist - pytorch-cuda=12.1 -- pytorch::pytorch>=2.0,<2.2.0a0 +- pytorch::pytorch>=2.0,<2.4.0a0 - raft-dask==24.12.*,>=0.0.0a0 - recommonmark - rmm==24.12.*,>=0.0.0a0 diff --git a/dependencies.yaml b/dependencies.yaml index 72b7239..7c6acc7 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -388,7 +388,7 @@ dependencies: common: - output_types: [conda] packages: - - pytorch::pytorch>=2.0,<2.2.0a0 + - pytorch::pytorch>=2.0,<2.4.0a0 - torchdata - pydantic specific: diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml index eef8327..8b9f0c0 100644 --- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml +++ b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml @@ -22,7 +22,7 @@ dependencies: - pytest-xdist - pytorch-cuda=11.8 - pytorch::pytorch>=2.0 -- pytorch::pytorch>=2.0,<2.2.0a0 +- pytorch::pytorch>=2.0,<2.4.0a0 - scipy - tensordict>=0.1.2 - torchdata diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml index 9f85b2d..1b8aa9c 100644 --- a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml +++ b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml @@ -21,7 +21,7 @@ dependencies: - pytest-xdist - pytorch-cuda=11.8 - pytorch::pytorch>=2.0 -- pytorch::pytorch>=2.0,<2.2.0a0 +- pytorch::pytorch>=2.0,<2.4.0a0 - scipy - tensordict>=0.1.2 - torchdata