This repository has been archived by the owner on Mar 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for download model from a specific revision (#77)
Signed-off-by: Hung-Han (Henry) Chen <[email protected]>
- Loading branch information
1 parent
13d4334
commit e22bd33
Showing
6 changed files
with
138 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,12 @@ | ||
name: Smoke Test | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
on: pull_request | ||
|
||
env: | ||
REGISTRY: quay.io | ||
REPO_ORG_NAME: ialacol | ||
IMAGE_NAME: ialacol-smoke-test | ||
GPTQ_IMAGE_TAG: gptq | ||
HELM_NAMESPACE: default | ||
LOGGING_LEVEL: DEBUG | ||
# for testing llama base models | ||
|
@@ -26,6 +24,12 @@ env: | |
STARCODER_MODEL_HG_REPO_ID: mike-ravkine/tiny_starcoder_py-GGML | ||
STARCODER_MODEL_FILE: tiny_starcoder_py-q8_0.bin | ||
STARCODER_SVC_PORT: 8002 | ||
# for testing gptq models | ||
GPTQ_HELM_RELEASE_NAME: stablecode-instruct-alpha-3b-gptq | ||
GPTQ_MODEL_HG_REPO_ID: TheBloke/stablecode-instruct-alpha-3b-GPTQ | ||
GPTQ_MODEL_HG_REVISION: gptq-4bit-32g-actorder_True | ||
GPTQ_MODEL_FILE: model.safetensors | ||
GPTQ_SVC_PORT: 8003 | ||
|
||
jobs: | ||
build-image: | ||
|
@@ -36,7 +40,7 @@ jobs: | |
with: | ||
fetch-depth: 0 | ||
|
||
- name: Login to Github Container Registry | ||
- name: Login to Registry | ||
uses: docker/login-action@v2 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
|
@@ -51,6 +55,29 @@ jobs: | |
push: true | ||
tags: | | ||
${{ env.REGISTRY }}/${{ env.REPO_ORG_NAME }}/${{ env.IMAGE_NAME }}:${{ github.sha }} | ||
build-gptq-cuda12-image: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Login to Registry | ||
uses: docker/login-action@v2 | ||
with: | ||
registry: ${{ env.REGISTRY }} | ||
username: ${{ secrets.QUAY_ROBOT_USERNAME }} | ||
password: ${{ secrets.QUAY_ROBOT_PASSWORD }} | ||
|
||
- name: Build and push Docker image | ||
uses: docker/build-push-action@v4 | ||
with: | ||
context: . | ||
file: ./Dockerfile.cuda12 | ||
push: true | ||
tags: | | ||
${{ env.REGISTRY }}/${{ env.REPO_ORG_NAME }}/${{ env.IMAGE_NAME }}:${{ env.GPTQ_IMAGE_TAG }} | ||
llama-smoke-test: | ||
runs-on: ubuntu-latest | ||
needs: build-image | ||
|
@@ -274,3 +301,79 @@ jobs: | |
- if: always() | ||
run: | | ||
kubectl logs --tail=200 --selector app.kubernetes.io/name=$STARCODER_HELM_RELEASE_NAME -n $HELM_NAMESPACE | ||
gptq-smoke-test: | ||
runs-on: ubuntu-latest | ||
needs: build-gptq-cuda12-image | ||
steps: | ||
- name: Create k8s Kind Cluster | ||
uses: helm/[email protected] | ||
|
||
- name: Set up Helm | ||
uses: azure/setup-helm@v3 | ||
with: | ||
version: v3.12.0 | ||
|
||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Install ialacol with GPTQ model from a revision and wait for pods to be ready | ||
run: | | ||
cat > values.yaml <<EOF | ||
replicas: 1 | ||
deployment: | ||
image: ${{ env.REGISTRY }}/${{ env.REPO_ORG_NAME }}/${{ env.IMAGE_NAME }}:${{ ${{ env.GPTQ_IMAGE_TAG }} | ||
env: | ||
DEFAULT_MODEL_HG_REPO_ID: $GPTQ_MODEL_HG_REPO_ID | ||
DEFAULT_MODEL_HG_REPO_REVISION: $GPTQ_MODEL_HG_REVISION | ||
DEFAULT_MODEL_FILE: $GPTQ_MODEL_FILE | ||
MODEL_TYPE: "gptq" | ||
LOGGING_LEVEL: $LOGGING_LEVEL | ||
resources: | ||
{} | ||
model: | ||
persistence: | ||
size: 3Gi | ||
accessModes: | ||
- ReadWriteOnce | ||
service: | ||
type: ClusterIP | ||
port: $GPTQ_SVC_PORT | ||
annotations: {} | ||
nodeSelector: {} | ||
tolerations: [] | ||
affinity: {} | ||
EOF | ||
helm install $GPTQ_HELM_RELEASE_NAME -f values.yaml --namespace $HELM_NAMESPACE ./charts/ialacol | ||
echo "Wait for the pod to be ready, it takes about 36s to download a 1.93GB model (~50MB/s)" | ||
sleep 40 | ||
- if: always() | ||
run: | | ||
kubectl get pods -n $HELM_NAMESPACE | ||
- if: always() | ||
run: | | ||
kubectl logs --tail=200 --selector app.kubernetes.io/name=$GPTQ_HELM_RELEASE_NAME -n $HELM_NAMESPACE | ||
- name: Port forward to the GPTQ model service | ||
run: | | ||
kubectl port-forward svc/$GPTQ_HELM_RELEASE_NAME $GPTQ_SVC_PORT:$GPTQ_SVC_PORT & | ||
echo "Wait for port-forward to be ready" | ||
sleep 5 | ||
- name: Check the GET /v1/models endpoint | ||
run: | | ||
curl http://localhost:$GPTQ_SVC_PORT/v1/models | ||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.11 | ||
- name: Install OpenAI CLI | ||
run: | | ||
pip install --upgrade openai --quiet | ||
# We can only test if download works and if GET /models returns something on CPU CI workers | ||
- name: Test the OpenAI CLI with default parameters | ||
run: | | ||
openai -k "sk-fake" -b http://localhost:$GPTQ_SVC_PORT/v1 api models.list | ||
- if: always() | ||
run: | | ||
kubectl logs --tail=200 --selector app.kubernetes.io/name=$LLAMA_HELM_RELEASE_NAME -n $HELM_NAMESPACE | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
apiVersion: v2 | ||
appVersion: 0.11.5 | ||
appVersion: 0.12.0 | ||
description: A Helm chart for ialacol | ||
name: ialacol | ||
type: application | ||
version: 0.11.5 | ||
version: 0.12.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters