Skip to content

Commit

Permalink
[Images] Prebake cuda image (mlrun#2813)
Browse files Browse the repository at this point in the history
  • Loading branch information
liranbg authored Dec 26, 2022
1 parent 0ac835a commit 9c47d18
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 70 deletions.
21 changes: 17 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -303,19 +303,32 @@ models-gpu: update-version-file ## Build models-gpu docker image
$(MLRUN_MODELS_GPU_CACHE_IMAGE_PULL_COMMAND)
docker build \
--file dockerfiles/models-gpu/Dockerfile \
--build-arg MLRUN_PYTHON_VERSION=$(MLRUN_PYTHON_VERSION) \
--build-arg MLRUN_PIP_VERSION=$(MLRUN_PIP_VERSION) \
--build-arg CUDA_VER=$(MLRUN_CUDA_VERSION) \
--build-arg HOROVOD_VERSION=$(MLRUN_HOROVOD_VERSION) \
$(MLRUN_MODELS_GPU_IMAGE_DOCKER_CACHE_FROM_FLAG) \
$(MLRUN_DOCKER_NO_CACHE_FLAG) \
--tag $(MLRUN_MODELS_GPU_IMAGE_NAME_TAGGED) .
--tag $(MLRUN_MODELS_GPU_IMAGE_NAME_TAGGED) \
.

.PHONY: push-models-gpu
push-models-gpu: models-gpu ## Push models gpu docker image
docker push $(MLRUN_MODELS_GPU_IMAGE_NAME_TAGGED)
$(MLRUN_MODELS_GPU_CACHE_IMAGE_PUSH_COMMAND)

.PHONY: prebake-models-gpu
prebake-models-gpu: ## Build prebake models GPU docker image
docker build \
--file dockerfiles/models-gpu/prebaked.Dockerfile \
--build-arg CUDA_VER=$(MLRUN_CUDA_VERSION) \
--build-arg MLRUN_PYTHON_VERSION=$(MLRUN_PYTHON_VERSION) \
--build-arg MLRUN_PIP_VERSION=$(MLRUN_PIP_VERSION) \
--build-arg HOROVOD_VERSION=$(MLRUN_HOROVOD_VERSION) \
--tag quay.io/mlrun/prebaked-cuda:$(MLRUN_CUDA_VERSION)-base-ubuntu20.04 \
.

.PHONY: push-prebake-models-gpu
push-prebake-models-gpu: ## Push prebake models GPU docker image
docker push quay.io/mlrun/prebaked-cuda:$(MLRUN_CUDA_VERSION)-base-ubuntu20.04


MLRUN_JUPYTER_IMAGE_NAME := $(MLRUN_DOCKER_IMAGE_PREFIX)/jupyter$(MLRUN_PYTHON_VERSION_SUFFIX):$(MLRUN_DOCKER_TAG)
DEFAULT_IMAGES += $(MLRUN_JUPYTER_IMAGE_NAME)
Expand Down
67 changes: 1 addition & 66 deletions dockerfiles/models-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,72 +13,7 @@
# limitations under the License.
ARG CUDA_VER=11.7.0

FROM quay.io/mlrun/cuda:${CUDA_VER}-base-ubuntu20.04

# need to be redeclared since used in the from
ARG CUDA_VER

ENV PIP_NO_CACHE_DIR=1

ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:$PATH

# Set default shell to /bin/bash
SHELL ["/bin/bash", "-cu"]

RUN apt-get update && \
apt-get upgrade -y && \
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y --no-install-recommends \
gcc \
cmake \
curl \
git-core \
graphviz \
wget && \
rm -rf /var/lib/apt/lists/*

RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh -O ~/installconda.sh && \
/bin/bash ~/installconda.sh -f -b -p /opt/conda && \
rm ~/installconda.sh && \
conda init bash

ARG MLRUN_PIP_VERSION=22.3.0
ARG MLRUN_PYTHON_VERSION=3.9.13
ARG OMPI_VERSION=4.1.4

ENV CONDA_OVERRIDE_CUDA ${CUDA_VER}

RUN conda config --add channels conda-forge && \
conda update -n base -c defaults conda && \
conda install -n base \
python=${MLRUN_PYTHON_VERSION} \
pip~=${MLRUN_PIP_VERSION} \
&& conda clean -aqy

RUN conda install -n base -c rapidsai -c nvidia -c pytorch -c conda-forge \
cmake \
cudatoolkit=${CUDA_VER} \
cudnn \
cxx-compiler=1.5.1 \
cython \
make \
nccl \
python=${MLRUN_PYTHON_VERSION} \
pytorch=1.13 \
rapids=22.10 \
tensorflow=2.9 \
torchvision=0.14 \
openmpi-mpicc=${OMPI_VERSION} \
&& conda clean -aqy

ENV OMPI_ALLOW_RUN_AS_ROOT=1
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1

ARG HOROVOD_VERSION=0.25.0

RUN HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 \
python -m pip install horovod~=${HOROVOD_VERSION} && \
horovodrun --check-build
FROM quay.io/mlrun/prebaked-cuda:${CUDA_VER}-base-ubuntu20.04

WORKDIR /mlrun

Expand Down
81 changes: 81 additions & 0 deletions dockerfiles/models-gpu/prebaked.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright 2020 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG CUDA_VER=11.7.0

FROM quay.io/mlrun/cuda:${CUDA_VER}-base-ubuntu20.04

# need to be redeclared since used in the from
ARG CUDA_VER

ENV PIP_NO_CACHE_DIR=1

ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:$PATH

# Set default shell to /bin/bash
SHELL ["/bin/bash", "-cu"]

RUN apt-get update && \
apt-get upgrade -y && \
DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y --no-install-recommends \
gcc \
cmake \
curl \
git-core \
graphviz \
wget && \
rm -rf /var/lib/apt/lists/*

RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh -O ~/installconda.sh && \
/bin/bash ~/installconda.sh -f -b -p /opt/conda && \
rm ~/installconda.sh && \
conda init bash

ARG MLRUN_PIP_VERSION=22.3.0
ARG MLRUN_PYTHON_VERSION=3.9.13
ARG OMPI_VERSION=4.1.4

ENV CONDA_OVERRIDE_CUDA ${CUDA_VER}

RUN conda config --add channels conda-forge && \
conda update -n base -c defaults conda && \
conda install -n base \
python=${MLRUN_PYTHON_VERSION} \
pip~=${MLRUN_PIP_VERSION} \
&& conda clean -aqy

RUN conda install -n base -c rapidsai -c nvidia -c pytorch -c conda-forge \
cmake \
cudatoolkit=${CUDA_VER} \
cudnn \
cxx-compiler=1.5.1 \
cython \
make \
nccl \
python=${MLRUN_PYTHON_VERSION} \
pytorch=1.13 \
rapids=22.10 \
tensorflow=2.9 \
torchvision=0.14 \
openmpi-mpicc=${OMPI_VERSION} \
&& conda clean -aqy

ENV OMPI_ALLOW_RUN_AS_ROOT=1
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1

ARG HOROVOD_VERSION=0.25.0

RUN HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_MPI=1 HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 \
python -m pip install horovod~=${HOROVOD_VERSION} && \
horovodrun --check-build

0 comments on commit 9c47d18

Please sign in to comment.