Skip to content
This repository has been archived by the owner on Mar 30, 2024. It is now read-only.

Commit

Permalink
Support stablecode, improve gpt-neox CI (#52)
Browse files Browse the repository at this point in the history
Signed-off-by: Hung-Han (Henry) Chen <[email protected]>
  • Loading branch information
chenhunghan authored Aug 13, 2023
1 parent 06da3db commit afb5db7
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 4 deletions.
85 changes: 83 additions & 2 deletions .github/workflows/smoke_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,16 @@ env:
LLAMA_MODEL_HG_REPO_ID: TheBloke/orca_mini_3B-GGML
LLAMA_MODEL_FILE: orca-mini-3b.ggmlv3.q4_0.bin
LLAMA_SVC_PORT: 8000
# for testing gpt-neox base models
GPT_NEOX_HELM_RELEASE_NAME: stablecode-instruct-alpha-3b
GPT_NEOX_MODEL_HG_REPO_ID: TheBloke/stablecode-instruct-alpha-3b-GGML
GPT_NEOX_MODEL_FILE: stablecode-instruct-alpha-3b.ggmlv1.q4_0.bin
GPT_NEOX_SVC_PORT: 8001
# for testing starcoder base models
STARCODER_HELM_RELEASE_NAME: tiny-starcoder-py
STARCODER_MODEL_HG_REPO_ID: mike-ravkine/tiny_starcoder_py-GGML
STARCODER_MODEL_FILE: tiny_starcoder_py-q8_0.bin
STARCODER_SVC_PORT: 8001
STARCODER_SVC_PORT: 8002

jobs:
build-image:
Expand Down Expand Up @@ -140,6 +145,82 @@ jobs:
- if: always()
run: |
kubectl logs --tail=20 --selector app.kubernetes.io/name=$LLAMA_HELM_RELEASE_NAME -n $HELM_NAMESPACE
gpt-neox-smoke-test:
runs-on: ubuntu-latest
needs: build-image
steps:
- name: Create k8s Kind Cluster
uses: helm/[email protected]

- name: Set up Helm
uses: azure/setup-helm@v3
with:
version: v3.12.0

- uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Install OpenAI CLI
run: |
pip install --upgrade openai --quiet
- name: Install ialacol with gpt-neox based model and wait for pods to be ready
run: |
helm repo add ialacol https://chenhunghan.github.io/ialacol
helm repo update
cat > values.yaml <<EOF
replicas: 1
deployment:
image: ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
env:
DEFAULT_MODEL_HG_REPO_ID: $GPT_NEOX_MODEL_HG_REPO_ID
DEFAULT_MODEL_FILE: $GPT_NEOX_MODEL_FILE
LOGGING_LEVEL: $LOGGING_LEVEL
TOP_K: 40
REPETITION_PENALTY: 1.176
resources:
{}
cache:
persistence:
size: 0.5Gi
accessModes:
- ReadWriteOnce
cacheMountPath: /app/cache
model:
persistence:
size: 0.5Gi
accessModes:
- ReadWriteOnce
modelMountPath: /app/models
service:
type: ClusterIP
port: $GPT_NEOX_SVC_PORT
annotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
EOF
helm install $GPT_NEOX_HELM_RELEASE_NAME ialacol/ialacol -f values.yaml --namespace $HELM_NAMESPACE
echo "Wait for the pod to be ready, it takes about 36s to download a 1.93GB model (~50MB/s)"
sleep 40
- if: always()
run: |
kubectl get pods -n $HELM_NAMESPACE
- if: always()
run: |
kubectl logs --tail=200 --selector app.kubernetes.io/name=$GPT_NEOX_HELM_RELEASE_NAME -n $HELM_NAMESPACE
- name: Port forward to the gpt-neox model service
run: |
kubectl port-forward svc/$GPT_NEOX_HELM_RELEASE_NAME $GPT_NEOX_SVC_PORT:$GPT_NEOX_SVC_PORT &
echo "Wait for port-forward to be ready"
sleep 5
- name: Check model response
run: |
openai -k "sk-fake" -b http://localhost:$GPT_NEOX_SVC_PORT/v1 -vvvvv api completions.create -m $GPT_NEOX_MODEL_FILE -p "A function adding 1 to 1 in Python."
- if: always()
run: |
kubectl logs --tail=20 --selector app.kubernetes.io/name=$GPT_NEOX_HELM_RELEASE_NAME -n $HELM_NAMESPACE
starcoder-smoke-test:
runs-on: ubuntu-latest
needs: build-image
Expand Down Expand Up @@ -183,7 +264,7 @@ jobs:
cacheMountPath: /app/cache
model:
persistence:
size: 0.5Gi
size: 2Gi
accessModes:
- ReadWriteOnce
modelMountPath: /app/models
Expand Down
4 changes: 2 additions & 2 deletions charts/ialacol/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: v2
appVersion: 0.10.0
appVersion: 0.10.1
description: A Helm chart for ialacol
name: ialacol
type: application
version: 0.10.0
version: 0.10.1
3 changes: 3 additions & 0 deletions get_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ async def get_llm(
ctransformer_model_type = "dolly-v2"
if "stablelm" in body.model:
ctransformer_model_type = "gpt_neox"
# matching https://huggingface.co/stabilityai/stablecode-completion-alpha-3b
if "stablecode" in body.model:
ctransformer_model_type = "gpt_neox"
config = get_config(body)
MODE_TYPE = get_env("MODE_TYPE", "")
if len(MODE_TYPE) > 0:
Expand Down

0 comments on commit afb5db7

Please sign in to comment.