Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First steps in developping moe incremental decoding #1551

Merged
Merged
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,7 @@ tests/inference/python_test_configs/*.json

core.*
fine_grained_alignment_config.json

# CLion
.idea/
cmake-build-debug
2 changes: 1 addition & 1 deletion config/config.linux
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
INSTALL_DIR=${INSTALL_DIR:-}

# set build type
BUILD_TYPE=${BUILD_TYPE:-Release}
BUILD_TYPE=${BUILD_TYPE:-Debug}

INFERENCE_TESTS=${INFERENCE_TESTS:-OFF}
LIBTORCH_PATH=${LIBTORCH_PATH:-"$(realpath ../..)/libtorch"}
Expand Down
135 changes: 135 additions & 0 deletions docker/run-bind.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#! /usr/bin/env bash
set -euo pipefail

# Usage: ./run.sh <docker_image_name>
# Optional environment variables: FF_GPU_BACKEND, cuda_version, hip_version, ATTACH_GPUS, SHM_SIZE

# Cd into directory holding this script
cd "${BASH_SOURCE[0]%/*}"

# Parse input params
image=${1:-flexflow}
FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda}
cuda_version=${cuda_version:-"empty"}
hip_version=${hip_version:-"empty"}

# Parameter controlling whether to attach GPUs to the Docker container
ATTACH_GPUS=${ATTACH_GPUS:-true}
gpu_arg=""
if $ATTACH_GPUS ; then gpu_arg="--gpus all" ; fi
FORWARD_STREAMLIT_PORT=${FORWARD_STREAMLIT_PORT:-true}
port_forward_arg=""
if $FORWARD_STREAMLIT_PORT ; then
port_forward_arg+="-p 8501:8501"
fi


# Amount of shared memory to give the Docker container access to
# If you get a Bus Error, increase this value. If you don't have enough memory
# on your machine, decrease this value.
SHM_SIZE=${SHM_SIZE:-8192m}

# Check docker image name
if [[ "$image" != @(flexflow-environment|flexflow) ]]; then
echo "Error, image name ${image} is invalid. Choose between 'flexflow-environment', 'flexflow'."
exit 1
fi

# Check GPU backend
if [[ "${FF_GPU_BACKEND}" != @(cuda|hip_cuda|hip_rocm|intel) ]]; then
echo "Error, value of FF_GPU_BACKEND (${FF_GPU_BACKEND}) is invalid. Pick between 'cuda', 'hip_cuda', 'hip_rocm' or 'intel'."
exit 1
elif [[ "${FF_GPU_BACKEND}" != "cuda" ]]; then
echo "Running $image docker image with gpu backend: ${FF_GPU_BACKEND}"
else
echo "Running $image docker image with default GPU backend: cuda"
fi

# gpu backend version suffix for the docker image.
gpu_backend_version=""

if [[ "${FF_GPU_BACKEND}" == "cuda" || "${FF_GPU_BACKEND}" == "hip_cuda" ]]; then
# Autodetect cuda version if not specified
if [[ $cuda_version == "empty" ]]; then
# shellcheck disable=SC2015
cuda_version=$(command -v nvcc >/dev/null 2>&1 && nvcc --version | grep "release" | awk '{print $NF}' || true)
# Change cuda_version eg. V11.7.99 to 11.7
cuda_version=${cuda_version:1:4}
if [[ -z "$cuda_version" ]]; then
echo "Could not detect CUDA version. Please specify one manually by setting the 'cuda_version' env."
exit 1
fi
fi
# Check that CUDA version is supported
if [[ "$cuda_version" != @(11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0|12.1|12.2|12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
echo "cuda_version is not supported, please choose among {11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0|12.1|12.2}"
exit 1
fi
# Use CUDA 12.2 for all versions greater or equal to 12.2 for now
if [[ "$cuda_version" == @(12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
cuda_version=12.2
fi
# Set cuda version suffix to docker image name
echo "Running $image docker image with CUDA $cuda_version"
gpu_backend_version="-${cuda_version}"
fi

if [[ "${FF_GPU_BACKEND}" == "hip_rocm" || "${FF_GPU_BACKEND}" == "hip_cuda" ]]; then
# Autodetect HIP version if not specified
if [[ $hip_version == "empty" ]]; then
# shellcheck disable=SC2015
hip_version=$(command -v hipcc >/dev/null 2>&1 && hipcc --version | grep "HIP version:" | awk '{print $NF}' || true)
# Change hip_version eg. 5.6.31061-8c743ae5d to 5.6
hip_version=${hip_version:0:3}
if [[ -z "$hip_version" ]]; then
echo "Could not detect HIP version. Please specify one manually by setting the 'hip_version' env."
exit 1
fi
fi
# Check that HIP version is supported
if [[ "$hip_version" != @(5.3|5.4|5.5|5.6) ]]; then
echo "hip_version is not supported, please choose among {5.3, 5.4, 5.5, 5.6}"
exit 1
fi
echo "Running $image docker image with HIP $hip_version"
if [[ "${FF_GPU_BACKEND}" == "hip_rocm" ]]; then
gpu_backend_version="-${hip_version}"
fi
fi

# Check that image exists, if fails, print the default error message.
if [[ "$(docker images -q "${image}-${FF_GPU_BACKEND}${gpu_backend_version}":latest 2> /dev/null)" == "" ]]; then
echo "Error, ${image}-${FF_GPU_BACKEND}${gpu_backend_version}:latest does not exist!"
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
echo ""
echo "To download the docker image, run:"
echo " FF_GPU_BACKEND=${FF_GPU_BACKEND} cuda_version=${cuda_version} $(pwd)/pull.sh $image"
echo "To build the docker image from source, run:"
echo " FF_GPU_BACKEND=${FF_GPU_BACKEND} cuda_version=${cuda_version} $(pwd)/build.sh $image"
echo ""
elif [[ "${FF_GPU_BACKEND}" == "hip_rocm" ]]; then
echo ""
echo "To download the docker image, run:"
echo " FF_GPU_BACKEND=${FF_GPU_BACKEND} hip_version=${hip_version} $(pwd)/pull.sh $image"
echo "To build the docker image from source, run:"
echo " FF_GPU_BACKEND=${FF_GPU_BACKEND} hip_version=${hip_version} $(pwd)/build.sh $image"
echo ""
fi
exit 1
fi

hf_token_volume=""
hf_token_path="$HOME/.cache/huggingface/token"
if [ -f "$hf_token_path" ]; then
# If the token exists, add the volume mount to the Docker command
hf_token_volume+="-v $hf_token_path:/root/.cache/huggingface/token"
fi

ssh_key_volume=""
ssh_key_path="$HOME/.ssh/id_rsa"
if [ -f "$ssh_key_path" ] && [ -f "$ssh_key_path.pub" ]; then
ssh_key_volume="-v $ssh_key_path:/root/.ssh/id_rsa -v $ssh_key_path.pub:/root/.ssh/id_rsa.pub"
fi
docker_command="docker run -v my-volume:/home -it $gpu_arg --shm-size=${SHM_SIZE} --cap-add=SYS_PTRACE ${ssh_key_volume} ${hf_token_volume} ${port_forward_arg} ${image}-${FF_GPU_BACKEND}${gpu_backend_version}:latest"
echo "$docker_command"
eval "$docker_command"
134 changes: 134 additions & 0 deletions docker/run-persistent.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#! /usr/bin/env bash
set -euo pipefail

# Usage: ./run.sh <docker_image_name>
# Optional environment variables: FF_GPU_BACKEND, cuda_version, hip_version, ATTACH_GPUS, SHM_SIZE

# Cd into directory holding this script
cd "${BASH_SOURCE[0]%/*}"

# Parse input params
image=${1:-flexflow}
FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda}
cuda_version=${cuda_version:-"empty"}
hip_version=${hip_version:-"empty"}

# Parameter controlling whether to attach GPUs to the Docker container
ATTACH_GPUS=${ATTACH_GPUS:-true}
gpu_arg=""
if $ATTACH_GPUS ; then gpu_arg="--gpus all" ; fi
FORWARD_STREAMLIT_PORT=${FORWARD_STREAMLIT_PORT:-true}
port_forward_arg=""
if $FORWARD_STREAMLIT_PORT ; then
port_forward_arg+="-p 8501:8501"
fi


# Amount of shared memory to give the Docker container access to
# If you get a Bus Error, increase this value. If you don't have enough memory
# on your machine, decrease this value.
SHM_SIZE=${SHM_SIZE:-8192m}

# Check docker image name
if [[ "$image" != @(flexflow-environment|flexflow) ]]; then
echo "Error, image name ${image} is invalid. Choose between 'flexflow-environment', 'flexflow'."
exit 1
fi

# Check GPU backend
if [[ "${FF_GPU_BACKEND}" != @(cuda|hip_cuda|hip_rocm|intel) ]]; then
echo "Error, value of FF_GPU_BACKEND (${FF_GPU_BACKEND}) is invalid. Pick between 'cuda', 'hip_cuda', 'hip_rocm' or 'intel'."
exit 1
elif [[ "${FF_GPU_BACKEND}" != "cuda" ]]; then
echo "Running $image docker image with gpu backend: ${FF_GPU_BACKEND}"
else
echo "Running $image docker image with default GPU backend: cuda"
fi

# gpu backend version suffix for the docker image.
gpu_backend_version=""

if [[ "${FF_GPU_BACKEND}" == "cuda" || "${FF_GPU_BACKEND}" == "hip_cuda" ]]; then
# Autodetect cuda version if not specified
if [[ $cuda_version == "empty" ]]; then
# shellcheck disable=SC2015
cuda_version=$(command -v nvcc >/dev/null 2>&1 && nvcc --version | grep "release" | awk '{print $NF}' || true)
# Change cuda_version eg. V11.7.99 to 11.7
cuda_version=${cuda_version:1:4}
if [[ -z "$cuda_version" ]]; then
echo "Could not detect CUDA version. Please specify one manually by setting the 'cuda_version' env."
exit 1
fi
fi
# Check that CUDA version is supported
if [[ "$cuda_version" != @(11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0|12.1|12.2|12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
echo "cuda_version is not supported, please choose among {11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0|12.1|12.2}"
exit 1
fi
# Use CUDA 12.2 for all versions greater or equal to 12.2 for now
if [[ "$cuda_version" == @(12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
cuda_version=12.2
fi
# Set cuda version suffix to docker image name
echo "Running $image docker image with CUDA $cuda_version"
gpu_backend_version="-${cuda_version}"
fi

if [[ "${FF_GPU_BACKEND}" == "hip_rocm" || "${FF_GPU_BACKEND}" == "hip_cuda" ]]; then
# Autodetect HIP version if not specified
if [[ $hip_version == "empty" ]]; then
# shellcheck disable=SC2015
hip_version=$(command -v hipcc >/dev/null 2>&1 && hipcc --version | grep "HIP version:" | awk '{print $NF}' || true)
# Change hip_version eg. 5.6.31061-8c743ae5d to 5.6
hip_version=${hip_version:0:3}
if [[ -z "$hip_version" ]]; then
echo "Could not detect HIP version. Please specify one manually by setting the 'hip_version' env."
exit 1
fi
fi
# Check that HIP version is supported
if [[ "$hip_version" != @(5.3|5.4|5.5|5.6) ]]; then
echo "hip_version is not supported, please choose among {5.3, 5.4, 5.5, 5.6}"
exit 1
fi
echo "Running $image docker image with HIP $hip_version"
if [[ "${FF_GPU_BACKEND}" == "hip_rocm" ]]; then
gpu_backend_version="-${hip_version}"
fi
fi

# Check that image exists, if fails, print the default error message.
if [[ "$(docker images -q "${image}-${FF_GPU_BACKEND}${gpu_backend_version}":latest 2> /dev/null)" == "" ]]; then
echo "Error, ${image}-${FF_GPU_BACKEND}${gpu_backend_version}:latest does not exist!"
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
echo ""
echo "To download the docker image, run:"
echo " FF_GPU_BACKEND=${FF_GPU_BACKEND} cuda_version=${cuda_version} $(pwd)/pull.sh $image"
echo "To build the docker image from source, run:"
echo " FF_GPU_BACKEND=${FF_GPU_BACKEND} cuda_version=${cuda_version} $(pwd)/build.sh $image"
echo ""
elif [[ "${FF_GPU_BACKEND}" == "hip_rocm" ]]; then
echo ""
echo "To download the docker image, run:"
echo " FF_GPU_BACKEND=${FF_GPU_BACKEND} hip_version=${hip_version} $(pwd)/pull.sh $image"
echo "To build the docker image from source, run:"
echo " FF_GPU_BACKEND=${FF_GPU_BACKEND} hip_version=${hip_version} $(pwd)/build.sh $image"
echo ""
fi
exit 1
fi

#cache_volume="-v ${HOME}/.cache:/root/.cache"
#home_volume="-v ${HOME}/dockerhome:/home"
cache_volume="-v cache_volume:/root/.cache"
home_volume="-v home_volume:/home"

ssh_key_volume=""
ssh_key_path="$HOME/.ssh/id_rsa"
if [ -f "$ssh_key_path" ] && [ -f "$ssh_key_path.pub" ]; then
ssh_key_volume="-v $ssh_key_path:/root/.ssh/id_rsa -v $ssh_key_path.pub:/root/.ssh/id_rsa.pub"
fi

docker_command="docker run -it $gpu_arg --shm-size=${SHM_SIZE} --cap-add=SYS_PTRACE ${ssh_key_volume} ${cache_volume} ${home_volume} ${port_forward_arg} ${image}-${FF_GPU_BACKEND}${gpu_backend_version}:latest"
echo "$docker_command"
eval "$docker_command"
5 changes: 4 additions & 1 deletion docker/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,7 @@ ssh_key_path="$HOME/.ssh/id_rsa"
if [ -f "$ssh_key_path" ] && [ -f "$ssh_key_path.pub" ]; then
ssh_key_volume="-v $ssh_key_path:/root/.ssh/id_rsa -v $ssh_key_path.pub:/root/.ssh/id_rsa.pub"
fi
eval docker run -it "$gpu_arg" "--shm-size=${SHM_SIZE}" "--cap-add=SYS_PTRACE" "${ssh_key_volume}" "${hf_token_volume}" "${port_forward_arg}" "${image}-${FF_GPU_BACKEND}${gpu_backend_version}:latest"

docker_command="docker run -it $gpu_arg --shm-size=${SHM_SIZE} --cap-add=SYS_PTRACE ${ssh_key_volume} ${hf_token_volume} ${port_forward_arg} ${image}-${FF_GPU_BACKEND}${gpu_backend_version}:latest"
echo "$docker_command"
eval "$docker_command"
Loading
Loading