diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index b26513b..c5cd672 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -8,44 +8,29 @@ permissions:
   contents: read
 
 jobs:
-  test:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-        cache: pip
-        cache-dependency-path: pyproject.toml
-    - name: Install dependencies
-      run: |
-        pip install -e '.[test]'
-    - name: Run tests
-      run: |
-        python -m pytest
+  call-test:
+    uses: ./.github/workflows/test.yml
   deploy:
     runs-on: ubuntu-latest
-    needs: [test]
+    needs: [call-test]
     environment: release
     permissions:
       id-token: write
     steps:
     - uses: actions/checkout@v4
-    - name: Set up Python
+    - name: Install uv
+      uses: astral-sh/setup-uv@v2
+      with:
+        enable-cache: true
+        cache-dependency-glob: "uv.lock"
+    - name: Set up Python 3.12
       uses: actions/setup-python@v5
       with:
         python-version: "3.12"
-        cache: pip
-        cache-dependency-path: pyproject.toml
     - name: Install dependencies
-      run: |
-        pip install setuptools wheel build
+      run: uv sync --all-extras --dev --python 3.12 --python-preference only-system
     - name: Build
       run: |
-        python -m build
+        uv build
     - name: Publish
       uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6a25b10..f3ba704 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,6 +1,6 @@
 name: Test
 
-on: [push, pull_request]
+on: [push, pull_request, workflow_call]
 
 permissions:
   contents: read
diff --git a/README.md b/README.md
index 7c254c7..29b19dd 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![Tests](https://github.com/rectalogic/llm-transformers/actions/workflows/test.yml/badge.svg)](https://github.com/rectalogic/llm-transformers/actions/workflows/test.yml)
 [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/rectalogic/llm-transformers/blob/main/LICENSE)
 
-Plugin for llm adding support for [🤗 Hugging Face Transformers](https://huggingface.co/docs/transformers/index)
+Plugin for [llm](https://llm.datasette.io/) adding support for 🤗 [Hugging Face Transformers](https://huggingface.co/docs/transformers/index) [pipeline](https://huggingface.co/docs/transformers/pipeline_tutorial) tasks.
 
 ## Installation
 
@@ -13,12 +13,25 @@ Install this plugin in the same environment as [LLM](https://llm.datasette.io/).
 ```bash
 llm install llm-transformers
 ```
+Some pipelines that accept audio/video inputs require the [ffmpeg](https://ffmpeg.org/) executable to be installed.
+The [`document-question-answering`](#document-question-answering) pipeline uses `pytesseract` which requires the [tesseract](https://tesseract-ocr.github.io/) executable.
+
 ## Usage
 
-XXX document `-o verbose True`
-XXX HF_TOKEN/key usage
+This plugin exposes 🤗 Hugging Face transformers pipelines, the "model" name is `transformers` and the pipeline task and/or Hugging Face model are specified as model options, e.g.:
+```sh-session
+$ llm -m transformers -o task text-generation "A dog has"
+$ llm -m transformers -o model facebook/musicgen-small "techno music"
+```
+If only `-o task <task>` is specified, the default model for that task will be used.
+If only `-m model <model>` is specified, the task will be inferred from the model.
+If both are specified, then the model must be compatible with the task.
+
+Transformers logging is verbose and disabled by default.
+Specify the `-o verbose True` model option to enable it.
 
-Most models are freely accessible, some of them require accepting a license agreement and using a Hugging Face [API token](https://huggingface.co/settings/tokens) that has access to the model.
+Most 🤗 Hugging Face models are freely accessible, some of them require accepting a license agreement
+and using a Hugging Face [API token](https://huggingface.co/settings/tokens) that has access to the model.
 You can use `llm keys set huggingface`, or set the `HF_TOKEN` env var, or use the `--key` option to `llm`.
 
 ```sh-session
@@ -29,6 +42,15 @@ $ llm --key hf_******************** -m transformers -o model meta-llama/Llama-3.
 A dog has been named as the killer of a woman who was found dead in her home.
 ```
 
+Some pipelines generate binary (audio, image, video) output, these are written to a temporary file
+and the path to the file is returned.
+A specific file can be specified with the `-o output <path.suffx>` model option.
+The suffix specifies the file type (e.g. `.png` vs `.jpg` etc).
+
+Pipelines can be tuned by passing additional keyword arguments to the pipeline call.
+These are specified as a JSON string in the `-o kwargs '<json>'` model option.
+See the documentation for a specific pipeline for information on additional keyword arguments.
+
 ## Transformer Pipeline Tasks
 
 ### [audio-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.AudioClassificationPipeline)
@@ -62,7 +84,6 @@ XXX embed image here?
 ### [document-question-answering](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.DocumentQuestionAnsweringPipeline)
 
 The `document-question-answering` task requires a `context` option which is a file or URL to an image:
-
 ```sh-session
 $ llm -m transformers -o task document-question-answering -o context https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png "What is the invoice number?"
 us-001
@@ -74,7 +95,6 @@ Not supported.
 ### [fill-mask](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.FillMaskPipeline)
 
 `fill-mask` requires a placeholder in the prompt, thiis is typically `<mask>` but is different for different models:
-
 ```sh-session
 $ llm -m transformers -o task fill-mask "My <mask> is about to explode"
 My brain is about to explode (score=0.09140042215585709)
@@ -174,7 +194,6 @@ $ llm -m transformers -o task summarization -o kwargs '{"min_length": 2, "max_le
 ### [table-question-answering](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TableQuestionAnsweringPipeline)
 
 `table-question-answering` takes a required `context` option - a path to a CSV file.
-
 ```sh-session
 $ cat <<EOF > /tmp/t.csv
 > Repository,Stars,Contributors,Programming language
@@ -205,7 +224,6 @@ POSITIVE (0.9997681975364685)
 ### [text-generation](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TextGenerationPipeline)
 
 Some `text-generation` models can be chatted with.
-
 ```sh-session
 $ llm -m transformers -o task text-generation "I am going to elect"
 I am going to elect the president of Mexico and that president should vote for our president," he said. "That's not very popular. That's not the American way. I would not want voters to accept the fact that that guy's running a
@@ -224,12 +242,14 @@ Your question was: "What is the capital of France?"
 
 ### [text-to-audio](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TextToAudioPipeline)
 
-`text-to-audio` generates audio, the response is the path to the audio file.
+`text-to-audio` generates audio, the response is the path to the generated audio file.
 ```sh-session
 $ llm -m transformers -o kwargs '{"generate_kwargs": {"max_new_tokens": 100}}' -o model facebook/musicgen-small "techno music"
 /var/folders/b1/1j9kkk053txc5krqbh0lj5t00000gn/T/tmpoueh05y6.wav
 $ llm -m transformers -o task text-to-audio "Hello world"
 /var/folders/b1/1j9kkk053txc5krqbh0lj5t00000gn/T/tmpmpwhkd8p.wav
+$ llm -m transformers -o task text-to-audio -o model facebook/mms-tts-eng -o output /tmp/speech.flac "Hello world"
+/tmp/speech.flac
 ```
 
 ### [token-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TokenClassificationPipeline)
@@ -243,7 +263,6 @@ London (I-LOC: 0.998397171497345)
 ### [translation_xx_to_yy](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TranslationPipeline)
 
 Substitute the from and to language codes into the task name, e.g. from `en` to `fr` would use task `translation_en_to_fr`:
-
 ```sh-session
 $ llm -m transformers -o task translation_en_to_fr "How old are you?"
  quel âge êtes-vous?
@@ -252,7 +271,6 @@ $ llm -m transformers -o task translation_en_to_fr "How old are you?"
 ### [video-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.VideoClassificationPipeline)
 
 `video-classification` task expects a video path or URL as the prompt:
-
 ```sh-session
 $ llm -m transformers -o task video-classification https://huggingface.co/datasets/Xuehai/MMWorld/resolve/main/Amazing%20street%20dance%20performance%20from%20Futunity%20UK%20-%20Move%20It%202013/Amazing%20street%20dance%20performance%20from%20Futunity%20UK%20-%20Move%20It%202013.mp4
 dancing ballet (0.006608937866985798)
@@ -265,7 +283,6 @@ punching bag (0.00565463537350297)
 ### [visual-question-answering](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.VisualQuestionAnsweringPipeline)
 
 `visual-question-answering` task requires an `context` option - a file or URL to an image:
-
 ```sh-session
 $ llm -m transformers -o task visual-question-answering -o context https://huggingface.co/datasets/Narsil/image_dummy/raw/main/lena.png "What is she wearing?"
 hat (0.9480269551277161)
@@ -278,7 +295,6 @@ nothing (0.0020962499547749758)
 ### [zero-shot-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ZeroShotClassificationPipeline)
 
 `zero-shot-classification` requires a comma separated list of labels to be specified in the `context` model option:
-
 ```sh-session
 $ llm -m transformers -o task zero-shot-classification -o context "urgent,not urgent,phone,tablet,computer" "I have a problem with my iphone that needs to be resolved asap!!"
 urgent (0.5036348700523376)
@@ -291,7 +307,6 @@ tablet (0.0023087668232619762)
 ### [zero-shot-image-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ZeroShotImageClassificationPipeline)
 
 `zero-shot-image-classification` requires a comma separated list of labels to be specified in the `context` model option. The prompt is a path or URL to an image:
-
 ```sh-session
 $ llm -m transformers -o task zero-shot-image-classification -o context "black and white,photorealist,painting" https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png
 black and white (0.9736384749412537)
@@ -301,8 +316,8 @@ painting (0.004946451168507338)
 
 ### [zero-shot-audio-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ZeroShotAudioClassificationPipeline)
 
-`zero-shot-audio-classification` requires a comma separated list of labels to be specified in the `context` model option. The prompt is a path or URL to an audio:
-
+`zero-shot-audio-classification` requires a comma separated list of labels to be specified in the `context` model option.
+The prompt is a path or URL to an audio:
 ```sh-session
 $ llm -m transformers -o task zero-shot-audio-classification -o context "Sound of a bird,Sound of a dog" https://huggingface.co/datasets/s3prl/Nonspeech/resolve/main/animal_sound/n52.wav
 Sound of a bird (0.9998763799667358)
@@ -313,7 +328,6 @@ Sound of a dog (0.00012355657236184925)
 
 `zero-shot-object-detection` requires a comma separated list of labels to be specified in the `context` model option. The prompt is a path or URL to an image.
 The response is JSON and includes a bounding box for each label:
-
 ```sh-session
 $ llm -m transformers -o task zero-shot-object-detection -o context "cat,couch" http://images.cocodataset.org/val2017/000000039769.jpg
 [
@@ -350,20 +364,13 @@ $ llm -m transformers -o task zero-shot-object-detection -o context "cat,couch"
 ]
 ```
 
-
 ## Development
 
-To set up this plugin locally, first checkout the code. Then create a new virtual environment:
-```bash
-cd llm-transformers
-python -m venv venv
-source venv/bin/activate
-```
-Now install the dependencies and test dependencies:
-```bash
-llm install -e '.[test]'
-```
-To run the tests:
-```bash
-python -m pytest
-```
+To set up this plugin locally, first checkout the code and install [`uv`](https://docs.astral.sh/uv/).
+`uv sync` to create a `venv` and install, then run tests:
+```sh-session
+$ uv sync --dev
+$ uv run pytest
+$ uv run ruff check
+$ uv run ruff format --check
+```
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 4df81d2..4909dcc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,14 +14,14 @@ classifiers = [
 dependencies = [
     "llm>=0.16",
     "transformers>=4.45.1",
-    "torch",
-    "soundfile",
-    "pillow",
-    "pytesseract",
-    "timm",
-    "protobuf",
-    "pandas",
-    "av",
+    "torch>=2.4.1",
+    "soundfile>=0.12.1",
+    "pillow>=10.4.0",
+    "pytesseract>=0.3.13",
+    "timm>=1.0.9",
+    "protobuf>=5.28.2",
+    "pandas>=2.2.3",
+    "av>=13.0.0",
     "numpy>=2.1.1",
 ]
 
@@ -84,4 +84,4 @@ markers = [
     "llm1: marks a subset of llm tests into their own group",
     "llm2: marks a subset of llm tests into their own group",
     "llm3: marks a subset of llm tests into their own group",
-]
\ No newline at end of file
+]
diff --git a/uv.lock b/uv.lock
index a380fe6..690e12d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -184,15 +184,15 @@ wheels = [
 
 [[package]]
 name = "httpcore"
-version = "1.0.5"
+version = "1.0.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/17/b0/5e8b8674f8d203335a62fdfcfa0d11ebe09e23613c3391033cbba35f7926/httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61", size = 83234 }
+sdist = { url = "https://files.pythonhosted.org/packages/b6/44/ed0fa6a17845fb033bd885c03e842f08c1b9406c86a2e60ac1ae1b9206a6/httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f", size = 85180 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/d4/e5d7e4f2174f8a4d63c8897d79eb8fe2503f7ecc03282fee1fa2719c2704/httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5", size = 77926 },
+    { url = "https://files.pythonhosted.org/packages/06/89/b161908e2f51be56568184aeb4a880fd287178d176fd1c860d2217f41106/httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f", size = 78011 },
 ]
 
 [[package]]
@@ -328,16 +328,16 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "av" },
+    { name = "av", specifier = ">=13.0.0" },
     { name = "llm", specifier = ">=0.16" },
     { name = "numpy", specifier = ">=2.1.1" },
-    { name = "pandas" },
-    { name = "pillow" },
-    { name = "protobuf" },
-    { name = "pytesseract" },
-    { name = "soundfile" },
-    { name = "timm" },
-    { name = "torch" },
+    { name = "pandas", specifier = ">=2.2.3" },
+    { name = "pillow", specifier = ">=10.4.0" },
+    { name = "protobuf", specifier = ">=5.28.2" },
+    { name = "pytesseract", specifier = ">=0.3.13" },
+    { name = "soundfile", specifier = ">=0.12.1" },
+    { name = "timm", specifier = ">=1.0.9" },
+    { name = "torch", specifier = ">=2.4.1" },
     { name = "transformers", specifier = ">=4.45.1" },
 ]
 
@@ -513,11 +513,11 @@ wheels = [
 
 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.6.68"
+version = "12.6.77"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/58/8c/69c9e39cd6bfa813852a94e9bd3c075045e2707d163e9dc2326c82d2c330/nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b3fd0779845f68b92063ab1393abab1ed0a23412fc520df79a8190d098b5cd6b", size = 19253287 },
-    { url = "https://files.pythonhosted.org/packages/a8/48/a9775d377cb95585fb188b469387f58ba6738e268de22eae2ad4cedb2c41/nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl", hash = "sha256:125a6c2a44e96386dda634e13d944e60b07a0402d391a070e8fb4104b34ea1ab", size = 19725597 },
+    { url = "https://files.pythonhosted.org/packages/11/8c/386018fdffdce2ff8d43fedf192ef7d14cab7501cbf78a106dd2e9f1fc1f/nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d", size = 19270432 },
+    { url = "https://files.pythonhosted.org/packages/fe/e4/486de766851d58699bcfeb3ba6a3beb4d89c3809f75b9d423b9508a8760f/nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142", size = 19745114 },
 ]
 
 [[package]]
@@ -530,7 +530,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.50.2"
+version = "1.51.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -542,9 +542,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/01/ea/eb091dd1f567860d44ee4c514bf53f6ea07b439f4679e9a9fd6e6496009c/openai-1.50.2.tar.gz", hash = "sha256:3987ae027152fc8bea745d60b02c8f4c4a76e1b5c70e73565fa556db6f78c9e6", size = 306004 }
+sdist = { url = "https://files.pythonhosted.org/packages/28/af/cc59b1447f5a02bb1f25b9b0cd94b607aa2c969a81d9a244d4067f91f6fe/openai-1.51.0.tar.gz", hash = "sha256:8dc4f9d75ccdd5466fc8c99a952186eddceb9fd6ba694044773f3736a847149d", size = 306880 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ee/49/7baf2537d7eedb162c8212e973bd69fe7233b463118e469bdb1bb07506f6/openai-1.50.2-py3-none-any.whl", hash = "sha256:822dd2051baa3393d0d5406990611975dd6f533020dc9375a34d4fe67e8b75f7", size = 382968 },
+    { url = "https://files.pythonhosted.org/packages/c7/08/9f22356d4fbd273f734db1e6663b7ca6987943080567f5580471022e57ca/openai-1.51.0-py3-none-any.whl", hash = "sha256:d9affafb7e51e5a27dce78589d4964ce4d6f6d560307265933a94b2e3f3c5d2c", size = 383533 },
 ]
 
 [[package]]
@@ -1140,7 +1140,7 @@ name = "triton"
 version = "3.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock" },
+    { name = "filelock", marker = "python_full_version < '3.13'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/fe/7b/7757205dee3628f75e7991021d15cd1bd0c9b044ca9affe99b50879fc0e1/triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb", size = 209464695 },