diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index b26513b..c5cd672 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -8,44 +8,29 @@ permissions: contents: read jobs: - test: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: pip - cache-dependency-path: pyproject.toml - - name: Install dependencies - run: | - pip install -e '.[test]' - - name: Run tests - run: | - python -m pytest + call-test: + uses: ./.github/workflows/test.yml deploy: runs-on: ubuntu-latest - needs: [test] + needs: [call-test] environment: release permissions: id-token: write steps: - uses: actions/checkout@v4 - - name: Set up Python + - name: Install uv + uses: astral-sh/setup-uv@v2 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + - name: Set up Python 3.12 uses: actions/setup-python@v5 with: python-version: "3.12" - cache: pip - cache-dependency-path: pyproject.toml - name: Install dependencies - run: | - pip install setuptools wheel build + run: uv sync --all-extras --dev --python 3.12 --python-preference only-system - name: Build run: | - python -m build + uv build - name: Publish uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6a25b10..f3ba704 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,6 @@ name: Test -on: [push, pull_request] +on: [push, pull_request, workflow_call] permissions: contents: read diff --git a/README.md b/README.md index 7c254c7..29b19dd 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Tests](https://github.com/rectalogic/llm-transformers/actions/workflows/test.yml/badge.svg)](https://github.com/rectalogic/llm-transformers/actions/workflows/test.yml) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/rectalogic/llm-transformers/blob/main/LICENSE) -Plugin for llm adding support for [🤗 Hugging Face Transformers](https://huggingface.co/docs/transformers/index) +Plugin for [llm](https://llm.datasette.io/) adding support for 🤗 [Hugging Face Transformers](https://huggingface.co/docs/transformers/index) [pipeline](https://huggingface.co/docs/transformers/pipeline_tutorial) tasks. ## Installation @@ -13,12 +13,25 @@ Install this plugin in the same environment as [LLM](https://llm.datasette.io/). ```bash llm install llm-transformers ``` +Some pipelines that accept audio/video inputs require the [ffmpeg](https://ffmpeg.org/) executable to be installed. +The [`document-question-answering`](#document-question-answering) pipeline uses `pytesseract` which requires the [tesseract](https://tesseract-ocr.github.io/) executable. + ## Usage -XXX document `-o verbose True` -XXX HF_TOKEN/key usage +This plugin exposes 🤗 Hugging Face transformers pipelines, the "model" name is `transformers` and the pipeline task and/or Hugging Face model are specified as model options, e.g.: +```sh-session +$ llm -m transformers -o task text-generation "A dog has" +$ llm -m transformers -o model facebook/musicgen-small "techno music" +``` +If only `-o task ` is specified, the default model for that task will be used. +If only `-m model ` is specified, the task will be inferred from the model. +If both are specified, then the model must be compatible with the task. + +Transformers logging is verbose and disabled by default. +Specify the `-o verbose True` model option to enable it. -Most models are freely accessible, some of them require accepting a license agreement and using a Hugging Face [API token](https://huggingface.co/settings/tokens) that has access to the model. +Most 🤗 Hugging Face models are freely accessible, some of them require accepting a license agreement +and using a Hugging Face [API token](https://huggingface.co/settings/tokens) that has access to the model. You can use `llm keys set huggingface`, or set the `HF_TOKEN` env var, or use the `--key` option to `llm`. ```sh-session @@ -29,6 +42,15 @@ $ llm --key hf_******************** -m transformers -o model meta-llama/Llama-3. A dog has been named as the killer of a woman who was found dead in her home. ``` +Some pipelines generate binary (audio, image, video) output, these are written to a temporary file +and the path to the file is returned. +A specific file can be specified with the `-o output ` model option. +The suffix specifies the file type (e.g. `.png` vs `.jpg` etc). + +Pipelines can be tuned by passing additional keyword arguments to the pipeline call. +These are specified as a JSON string in the `-o kwargs ''` model option. +See the documentation for a specific pipeline for information on additional keyword arguments. + ## Transformer Pipeline Tasks ### [audio-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.AudioClassificationPipeline) @@ -62,7 +84,6 @@ XXX embed image here? ### [document-question-answering](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.DocumentQuestionAnsweringPipeline) The `document-question-answering` task requires a `context` option which is a file or URL to an image: - ```sh-session $ llm -m transformers -o task document-question-answering -o context https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png "What is the invoice number?" us-001 @@ -74,7 +95,6 @@ Not supported. ### [fill-mask](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.FillMaskPipeline) `fill-mask` requires a placeholder in the prompt, thiis is typically `` but is different for different models: - ```sh-session $ llm -m transformers -o task fill-mask "My is about to explode" My brain is about to explode (score=0.09140042215585709) @@ -174,7 +194,6 @@ $ llm -m transformers -o task summarization -o kwargs '{"min_length": 2, "max_le ### [table-question-answering](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TableQuestionAnsweringPipeline) `table-question-answering` takes a required `context` option - a path to a CSV file. - ```sh-session $ cat < /tmp/t.csv > Repository,Stars,Contributors,Programming language @@ -205,7 +224,6 @@ POSITIVE (0.9997681975364685) ### [text-generation](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TextGenerationPipeline) Some `text-generation` models can be chatted with. - ```sh-session $ llm -m transformers -o task text-generation "I am going to elect" I am going to elect the president of Mexico and that president should vote for our president," he said. "That's not very popular. That's not the American way. I would not want voters to accept the fact that that guy's running a @@ -224,12 +242,14 @@ Your question was: "What is the capital of France?" ### [text-to-audio](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TextToAudioPipeline) -`text-to-audio` generates audio, the response is the path to the audio file. +`text-to-audio` generates audio, the response is the path to the generated audio file. ```sh-session $ llm -m transformers -o kwargs '{"generate_kwargs": {"max_new_tokens": 100}}' -o model facebook/musicgen-small "techno music" /var/folders/b1/1j9kkk053txc5krqbh0lj5t00000gn/T/tmpoueh05y6.wav $ llm -m transformers -o task text-to-audio "Hello world" /var/folders/b1/1j9kkk053txc5krqbh0lj5t00000gn/T/tmpmpwhkd8p.wav +$ llm -m transformers -o task text-to-audio -o model facebook/mms-tts-eng -o output /tmp/speech.flac "Hello world" +/tmp/speech.flac ``` ### [token-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TokenClassificationPipeline) @@ -243,7 +263,6 @@ London (I-LOC: 0.998397171497345) ### [translation_xx_to_yy](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TranslationPipeline) Substitute the from and to language codes into the task name, e.g. from `en` to `fr` would use task `translation_en_to_fr`: - ```sh-session $ llm -m transformers -o task translation_en_to_fr "How old are you?" quel âge êtes-vous? @@ -252,7 +271,6 @@ $ llm -m transformers -o task translation_en_to_fr "How old are you?" ### [video-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.VideoClassificationPipeline) `video-classification` task expects a video path or URL as the prompt: - ```sh-session $ llm -m transformers -o task video-classification https://huggingface.co/datasets/Xuehai/MMWorld/resolve/main/Amazing%20street%20dance%20performance%20from%20Futunity%20UK%20-%20Move%20It%202013/Amazing%20street%20dance%20performance%20from%20Futunity%20UK%20-%20Move%20It%202013.mp4 dancing ballet (0.006608937866985798) @@ -265,7 +283,6 @@ punching bag (0.00565463537350297) ### [visual-question-answering](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.VisualQuestionAnsweringPipeline) `visual-question-answering` task requires an `context` option - a file or URL to an image: - ```sh-session $ llm -m transformers -o task visual-question-answering -o context https://huggingface.co/datasets/Narsil/image_dummy/raw/main/lena.png "What is she wearing?" hat (0.9480269551277161) @@ -278,7 +295,6 @@ nothing (0.0020962499547749758) ### [zero-shot-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ZeroShotClassificationPipeline) `zero-shot-classification` requires a comma separated list of labels to be specified in the `context` model option: - ```sh-session $ llm -m transformers -o task zero-shot-classification -o context "urgent,not urgent,phone,tablet,computer" "I have a problem with my iphone that needs to be resolved asap!!" urgent (0.5036348700523376) @@ -291,7 +307,6 @@ tablet (0.0023087668232619762) ### [zero-shot-image-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ZeroShotImageClassificationPipeline) `zero-shot-image-classification` requires a comma separated list of labels to be specified in the `context` model option. The prompt is a path or URL to an image: - ```sh-session $ llm -m transformers -o task zero-shot-image-classification -o context "black and white,photorealist,painting" https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png black and white (0.9736384749412537) @@ -301,8 +316,8 @@ painting (0.004946451168507338) ### [zero-shot-audio-classification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ZeroShotAudioClassificationPipeline) -`zero-shot-audio-classification` requires a comma separated list of labels to be specified in the `context` model option. The prompt is a path or URL to an audio: - +`zero-shot-audio-classification` requires a comma separated list of labels to be specified in the `context` model option. +The prompt is a path or URL to an audio: ```sh-session $ llm -m transformers -o task zero-shot-audio-classification -o context "Sound of a bird,Sound of a dog" https://huggingface.co/datasets/s3prl/Nonspeech/resolve/main/animal_sound/n52.wav Sound of a bird (0.9998763799667358) @@ -313,7 +328,6 @@ Sound of a dog (0.00012355657236184925) `zero-shot-object-detection` requires a comma separated list of labels to be specified in the `context` model option. The prompt is a path or URL to an image. The response is JSON and includes a bounding box for each label: - ```sh-session $ llm -m transformers -o task zero-shot-object-detection -o context "cat,couch" http://images.cocodataset.org/val2017/000000039769.jpg [ @@ -350,20 +364,13 @@ $ llm -m transformers -o task zero-shot-object-detection -o context "cat,couch" ] ``` - ## Development -To set up this plugin locally, first checkout the code. Then create a new virtual environment: -```bash -cd llm-transformers -python -m venv venv -source venv/bin/activate -``` -Now install the dependencies and test dependencies: -```bash -llm install -e '.[test]' -``` -To run the tests: -```bash -python -m pytest -``` +To set up this plugin locally, first checkout the code and install [`uv`](https://docs.astral.sh/uv/). +`uv sync` to create a `venv` and install, then run tests: +```sh-session +$ uv sync --dev +$ uv run pytest +$ uv run ruff check +$ uv run ruff format --check +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4df81d2..4909dcc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,14 +14,14 @@ classifiers = [ dependencies = [ "llm>=0.16", "transformers>=4.45.1", - "torch", - "soundfile", - "pillow", - "pytesseract", - "timm", - "protobuf", - "pandas", - "av", + "torch>=2.4.1", + "soundfile>=0.12.1", + "pillow>=10.4.0", + "pytesseract>=0.3.13", + "timm>=1.0.9", + "protobuf>=5.28.2", + "pandas>=2.2.3", + "av>=13.0.0", "numpy>=2.1.1", ] @@ -84,4 +84,4 @@ markers = [ "llm1: marks a subset of llm tests into their own group", "llm2: marks a subset of llm tests into their own group", "llm3: marks a subset of llm tests into their own group", -] \ No newline at end of file +] diff --git a/uv.lock b/uv.lock index a380fe6..690e12d 100644 --- a/uv.lock +++ b/uv.lock @@ -184,15 +184,15 @@ wheels = [ [[package]] name = "httpcore" -version = "1.0.5" +version = "1.0.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "certifi" }, { name = "h11" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/17/b0/5e8b8674f8d203335a62fdfcfa0d11ebe09e23613c3391033cbba35f7926/httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61", size = 83234 } +sdist = { url = "https://files.pythonhosted.org/packages/b6/44/ed0fa6a17845fb033bd885c03e842f08c1b9406c86a2e60ac1ae1b9206a6/httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f", size = 85180 } wheels = [ - { url = "https://files.pythonhosted.org/packages/78/d4/e5d7e4f2174f8a4d63c8897d79eb8fe2503f7ecc03282fee1fa2719c2704/httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5", size = 77926 }, + { url = "https://files.pythonhosted.org/packages/06/89/b161908e2f51be56568184aeb4a880fd287178d176fd1c860d2217f41106/httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f", size = 78011 }, ] [[package]] @@ -328,16 +328,16 @@ dev = [ [package.metadata] requires-dist = [ - { name = "av" }, + { name = "av", specifier = ">=13.0.0" }, { name = "llm", specifier = ">=0.16" }, { name = "numpy", specifier = ">=2.1.1" }, - { name = "pandas" }, - { name = "pillow" }, - { name = "protobuf" }, - { name = "pytesseract" }, - { name = "soundfile" }, - { name = "timm" }, - { name = "torch" }, + { name = "pandas", specifier = ">=2.2.3" }, + { name = "pillow", specifier = ">=10.4.0" }, + { name = "protobuf", specifier = ">=5.28.2" }, + { name = "pytesseract", specifier = ">=0.3.13" }, + { name = "soundfile", specifier = ">=0.12.1" }, + { name = "timm", specifier = ">=1.0.9" }, + { name = "torch", specifier = ">=2.4.1" }, { name = "transformers", specifier = ">=4.45.1" }, ] @@ -513,11 +513,11 @@ wheels = [ [[package]] name = "nvidia-nvjitlink-cu12" -version = "12.6.68" +version = "12.6.77" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/58/8c/69c9e39cd6bfa813852a94e9bd3c075045e2707d163e9dc2326c82d2c330/nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b3fd0779845f68b92063ab1393abab1ed0a23412fc520df79a8190d098b5cd6b", size = 19253287 }, - { url = "https://files.pythonhosted.org/packages/a8/48/a9775d377cb95585fb188b469387f58ba6738e268de22eae2ad4cedb2c41/nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl", hash = "sha256:125a6c2a44e96386dda634e13d944e60b07a0402d391a070e8fb4104b34ea1ab", size = 19725597 }, + { url = "https://files.pythonhosted.org/packages/11/8c/386018fdffdce2ff8d43fedf192ef7d14cab7501cbf78a106dd2e9f1fc1f/nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d", size = 19270432 }, + { url = "https://files.pythonhosted.org/packages/fe/e4/486de766851d58699bcfeb3ba6a3beb4d89c3809f75b9d423b9508a8760f/nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142", size = 19745114 }, ] [[package]] @@ -530,7 +530,7 @@ wheels = [ [[package]] name = "openai" -version = "1.50.2" +version = "1.51.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -542,9 +542,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/01/ea/eb091dd1f567860d44ee4c514bf53f6ea07b439f4679e9a9fd6e6496009c/openai-1.50.2.tar.gz", hash = "sha256:3987ae027152fc8bea745d60b02c8f4c4a76e1b5c70e73565fa556db6f78c9e6", size = 306004 } +sdist = { url = "https://files.pythonhosted.org/packages/28/af/cc59b1447f5a02bb1f25b9b0cd94b607aa2c969a81d9a244d4067f91f6fe/openai-1.51.0.tar.gz", hash = "sha256:8dc4f9d75ccdd5466fc8c99a952186eddceb9fd6ba694044773f3736a847149d", size = 306880 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/49/7baf2537d7eedb162c8212e973bd69fe7233b463118e469bdb1bb07506f6/openai-1.50.2-py3-none-any.whl", hash = "sha256:822dd2051baa3393d0d5406990611975dd6f533020dc9375a34d4fe67e8b75f7", size = 382968 }, + { url = "https://files.pythonhosted.org/packages/c7/08/9f22356d4fbd273f734db1e6663b7ca6987943080567f5580471022e57ca/openai-1.51.0-py3-none-any.whl", hash = "sha256:d9affafb7e51e5a27dce78589d4964ce4d6f6d560307265933a94b2e3f3c5d2c", size = 383533 }, ] [[package]] @@ -1140,7 +1140,7 @@ name = "triton" version = "3.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "filelock" }, + { name = "filelock", marker = "python_full_version < '3.13'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/fe/7b/7757205dee3628f75e7991021d15cd1bd0c9b044ca9affe99b50879fc0e1/triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb", size = 209464695 },