structured output for /completion API

meta-llama · Jan 18, 2025 · 72064c4 · 72064c4
1 parent 3a9468c
commit 72064c4
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 11 deletions.
diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml
@@ -1,14 +1,15 @@
 name: Publish Python 🐍 distribution 📦 to TestPyPI
 
 on:
-  workflow_dispatch:  # Keep manual trigger
-    inputs:
-      version:
-        description: 'Version number (e.g. 0.0.63.dev20250111)'
-        required: true
-        type: string
-  schedule:
-    - cron: "0 0 * * *"  # Run every day at midnight
+  push:
+  # workflow_dispatch:  # Keep manual trigger
+  #   inputs:
+  #     version:
+  #       description: 'Version number (e.g. 0.0.63.dev20250111)'
+  #       required: true
+  #       type: string
+  # schedule:
+  #   - cron: "0 0 * * *"  # Run every day at midnight
 
 jobs:
   trigger-client-and-models-build:
@@ -201,7 +202,9 @@ jobs:
     runs-on: ubuntu-latest
     env:
       TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
+      FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
       TAVILY_SEARCH_API_KEY: ${{ secrets.TAVILY_SEARCH_API_KEY }}
+
     steps:
     - uses: actions/checkout@v4
       with:
@@ -241,4 +244,10 @@ jobs:
         pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb
         pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
 
-    # TODO: add trigger for integration test workflow & docker builds
+    - name: Integration tests
+      if: always()
+      run: |
+        llama stack build --template fireworks --image-type venv
+        llama stack build --template together --image-type venv
+        pip install pytest_html
+        pytest ./llama_stack/providers/tests/ --config=github_ci_test_config.yaml
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -172,6 +172,7 @@ async def completion(
             model=model.provider_resource_id,
             content=content,
             sampling_params=sampling_params,
+            response_format=response_format,
             stream=stream,
             logprobs=logprobs,
         )

diff --git a/llama_stack/providers/tests/github_ci_test_config.yaml b/llama_stack/providers/tests/github_ci_test_config.yaml
@@ -0,0 +1,18 @@
+inference:
+  tests:
+  - inference/test_vision_inference.py::test_vision_chat_completion_streaming
+  - inference/test_vision_inference.py::test_vision_chat_completion_non_streaming
+  - inference/test_text_inference.py::test_structured_output
+  - inference/test_text_inference.py::test_chat_completion_streaming
+  - inference/test_text_inference.py::test_chat_completion_non_streaming
+  - inference/test_text_inference.py::test_chat_completion_with_tool_calling
+  - inference/test_text_inference.py::test_chat_completion_with_tool_calling_streaming
+
+  scenarios:
+  - fixture_combo_id: fireworks
+  - provider_fixtures:
+      inference: together
+
+  inference_models:
+  - meta-llama/Llama-3.1-8B-Instruct
+  - meta-llama/Llama-3.2-11B-Vision-Instruct
diff --git a/llama_stack/providers/tests/inference/test_text_inference.py b/llama_stack/providers/tests/inference/test_text_inference.py
@@ -208,7 +208,6 @@ async def test_completion_logprobs(self, inference_model, inference_stack):
                 assert not chunk.logprobs, "Logprobs should be empty"
 
     @pytest.mark.asyncio(loop_scope="session")
-    @pytest.mark.skip("This test is not quite robust")
     async def test_completion_structured_output(self, inference_model, inference_stack):
         inference_impl, _ = inference_stack
 

diff --git a/tests/client-sdk/inference/test_inference.py b/tests/client-sdk/inference/test_inference.py
@@ -11,7 +11,7 @@
 from pydantic import BaseModel
 
 PROVIDER_TOOL_PROMPT_FORMAT = {
-    "remote::ollama": "python_list",
+    "remote::ollama": "json",
     "remote::together": "json",
     "remote::fireworks": "json",
 }
@@ -107,6 +107,7 @@ def test_completion_streaming(llama_stack_client, text_model_id):
     assert "blue" in "".join(streamed_content).lower().strip()
 
 
+@pytest.mark.skip
 def test_completion_log_probs_non_streaming(llama_stack_client, text_model_id):
     response = llama_stack_client.inference.completion(
         content="Complete the sentence: Micheael Jordan is born in ",
@@ -124,6 +125,7 @@ def test_completion_log_probs_non_streaming(llama_stack_client, text_model_id):
     assert all(len(logprob.logprobs_by_token) == 3 for logprob in response.logprobs)
 
 
+@pytest.mark.skip
 def test_completion_log_probs_streaming(llama_stack_client, text_model_id):
     response = llama_stack_client.inference.completion(
         content="Complete the sentence: Micheael Jordan is born in ",