From 4167290beea3eb6e40b4046a45cbe531e778c6dd Mon Sep 17 00:00:00 2001 From: David Dickinson Date: Fri, 12 Apr 2024 01:01:45 -0700 Subject: [PATCH 1/2] Have 'terraform apply' successfully deploying label studio to huggingface spaces in the 'gcp-modular' directory - created a 'label-studio-hf-module'. Made changes to add options in constants and enums for the new label studio component, and updated logic to allow 'huggingface' to be accepted as a provider at the component level --- src/mlstacks/constants.py | 9 ++- src/mlstacks/enums.py | 2 + .../terraform/gcp-modular/label_studio_hf.tf | 27 ++++++++ .../terraform/gcp-modular/terraform.tf | 7 +- .../terraform/gcp-modular/variables.tf | 14 ++++ .../label-studio-hf-module/label-studio.tf | 13 ++++ .../modules/label-studio-hf-module/outputs.tf | 41 +++++++++++ .../label-studio-hf-module/providers.tf | 9 +++ .../label-studio-hf-module/variables.tf | 69 +++++++++++++++++++ src/mlstacks/utils/yaml_utils.py | 2 +- 10 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 src/mlstacks/terraform/gcp-modular/label_studio_hf.tf create mode 100644 src/mlstacks/terraform/modules/label-studio-hf-module/label-studio.tf create mode 100644 src/mlstacks/terraform/modules/label-studio-hf-module/outputs.tf create mode 100644 src/mlstacks/terraform/modules/label-studio-hf-module/providers.tf create mode 100644 src/mlstacks/terraform/modules/label-studio-hf-module/variables.tf diff --git a/src/mlstacks/constants.py b/src/mlstacks/constants.py index 3b88745b..44863f7c 100644 --- a/src/mlstacks/constants.py +++ b/src/mlstacks/constants.py @@ -17,6 +17,7 @@ MLSTACKS_PACKAGE_NAME = "mlstacks" MLSTACKS_INITIALIZATION_FILE_FLAG = "IGNORE_ME" MLSTACKS_STACK_COMPONENT_FLAGS = [ + "annotator", "artifact_store", "container_registry", "experiment_tracker", # takes flavor @@ -26,6 +27,7 @@ "step_operator", # takes flavor ] ALLOWED_FLAVORS = { + "annotator": ["label_studio"], "artifact_store": ["s3", "gcp", "minio"], "container_registry": ["gcp", "aws", "default"], "experiment_tracker": ["mlflow"], @@ -43,7 +45,8 @@ } ALLOWED_COMPONENT_TYPES: Dict[str, Dict[str, List[str]]] = { "aws": { - "artifact_store": ["s3"], + "annotator": ["huggingface"], + "artifact_store": ["s3"], "container_registry": ["aws"], "experiment_tracker": ["mlflow"], "orchestrator": [ @@ -59,6 +62,7 @@ }, "azure": {}, "gcp": { + "annotator": ["huggingface"], "artifact_store": ["gcp"], "container_registry": ["gcp"], "experiment_tracker": ["mlflow"], @@ -73,6 +77,9 @@ "model_deployer": ["seldon"], "step_operator": ["vertex"], }, + "huggingface": { + "annotator": ["label_studio"], + }, "k3d": { "artifact_store": ["minio"], "container_registry": ["default"], diff --git a/src/mlstacks/enums.py b/src/mlstacks/enums.py index 122e2806..d5fd8934 100644 --- a/src/mlstacks/enums.py +++ b/src/mlstacks/enums.py @@ -40,6 +40,7 @@ class ComponentFlavorEnum(str, Enum): GCP = "gcp" KUBEFLOW = "kubeflow" KUBERNETES = "kubernetes" + LABEL_STUDIO = "label_studio" MINIO = "minio" MLFLOW = "mlflow" S3 = "s3" @@ -65,6 +66,7 @@ class ProviderEnum(str, Enum): AZURE = "azure" GCP = "gcp" K3D = "k3d" + HUGGINGFACE = "huggingface" class AnalyticsEventsEnum(str, Enum): diff --git a/src/mlstacks/terraform/gcp-modular/label_studio_hf.tf b/src/mlstacks/terraform/gcp-modular/label_studio_hf.tf new file mode 100644 index 00000000..708db339 --- /dev/null +++ b/src/mlstacks/terraform/gcp-modular/label_studio_hf.tf @@ -0,0 +1,27 @@ +provider "huggingface-spaces" { + # alias = "strickvl" + token = var.huggingface_token +} + +# using the labelstudio huggingface module to create a label studio space on huggingface + +module "label_studio_hf" { + source = "../modules/label-studio-hf-module" + # providers = { + # huggingface-spaces.strickvl = huggingface-spaces.strickvl + # } + count = var.enable_annotator ? 1 : 0 + huggingface_token = var.huggingface_token + enable_annotator = var.enable_annotator +} + +output "module_huggingface_token" { + value = length(module.label_studio_hf) > 0 ? module.label_studio_hf[0].huggingface_token_passed : "" + sensitive = true +} + +output "token_hash" { + value = sha256(module.label_studio_hf[0].huggingface_token_passed) + sensitive = true +} + diff --git a/src/mlstacks/terraform/gcp-modular/terraform.tf b/src/mlstacks/terraform/gcp-modular/terraform.tf index 899c66e6..d89cb0f7 100644 --- a/src/mlstacks/terraform/gcp-modular/terraform.tf +++ b/src/mlstacks/terraform/gcp-modular/terraform.tf @@ -29,10 +29,15 @@ terraform { source = "loafoe/htpasswd" version = "1.0.3" } + + huggingface-spaces = { + source = "strickvl/huggingface-spaces" + version = ">= 0.0.4" # Specify the version or version constraint here + } } backend "local" { - config = {} + # config = {} } required_version = ">= 0.14.8" diff --git a/src/mlstacks/terraform/gcp-modular/variables.tf b/src/mlstacks/terraform/gcp-modular/variables.tf index 74d14e6f..c99543bf 100644 --- a/src/mlstacks/terraform/gcp-modular/variables.tf +++ b/src/mlstacks/terraform/gcp-modular/variables.tf @@ -27,6 +27,20 @@ variable "enable_experiment_tracker_mlflow" { description = "Enable MLflow deployment" default = false } + +variable "enable_annotator" { + description = "Enable Label Studio deployment" + type = bool + default = true +} + +variable "huggingface_token" { + description = "Huggingface token" + type = string + # sensitive = true + default = "" +} + variable "enable_model_deployer_seldon" { description = "Enable Seldon deployment" default = false diff --git a/src/mlstacks/terraform/modules/label-studio-hf-module/label-studio.tf b/src/mlstacks/terraform/modules/label-studio-hf-module/label-studio.tf new file mode 100644 index 00000000..56ac7cbb --- /dev/null +++ b/src/mlstacks/terraform/modules/label-studio-hf-module/label-studio.tf @@ -0,0 +1,13 @@ +resource "huggingface-spaces_space" "ls_space" { + provider = huggingface-spaces + name = "label-studio-hf-module-${formatdate("YYYYMMDD", timestamp())}" + private = false + sdk = "docker" + template = "LabelStudio/LabelStudio" + + hardware = var.label_studio_hardware + sleep_time = var.label_studio_sleep_time + storage = "small" +} + + diff --git a/src/mlstacks/terraform/modules/label-studio-hf-module/outputs.tf b/src/mlstacks/terraform/modules/label-studio-hf-module/outputs.tf new file mode 100644 index 00000000..0e2505d0 --- /dev/null +++ b/src/mlstacks/terraform/modules/label-studio-hf-module/outputs.tf @@ -0,0 +1,41 @@ +data "huggingface-spaces_space" "ls_space_data" { + id = huggingface-spaces_space.ls_space.id +} + +output "label_studio_id" { + value = huggingface-spaces_space.ls_space.id +} + +output "label_studio_name" { + value = data.huggingface-spaces_space.ls_space_data.name +} + +output "label_studio_author" { + value = data.huggingface-spaces_space.ls_space_data.author +} + +output "label_studio_last_modified" { + value = data.huggingface-spaces_space.ls_space_data.last_modified +} + +output "label_studio_likes" { + value = data.huggingface-spaces_space.ls_space_data.likes +} + +output "label_studio_private" { + value = data.huggingface-spaces_space.ls_space_data.private +} + +output "label_studio_sdk" { + value = data.huggingface-spaces_space.ls_space_data.sdk +} + +output "label_studio_hardware" { + value = data.huggingface-spaces_space.ls_space_data.hardware +} + +output "huggingface_token_passed" { + value = var.huggingface_token +} + + diff --git a/src/mlstacks/terraform/modules/label-studio-hf-module/providers.tf b/src/mlstacks/terraform/modules/label-studio-hf-module/providers.tf new file mode 100644 index 00000000..bef76ff3 --- /dev/null +++ b/src/mlstacks/terraform/modules/label-studio-hf-module/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_providers { + huggingface-spaces = { + source = "strickvl/huggingface-spaces" + version = ">= 0.0.4" + # configuration_aliases = [huggingface-spaces.strickvl] + } + } +} diff --git a/src/mlstacks/terraform/modules/label-studio-hf-module/variables.tf b/src/mlstacks/terraform/modules/label-studio-hf-module/variables.tf new file mode 100644 index 00000000..959a2453 --- /dev/null +++ b/src/mlstacks/terraform/modules/label-studio-hf-module/variables.tf @@ -0,0 +1,69 @@ +variable "huggingface_token" { + type = string + description = "The Hugging Face API token." + sensitive = true +} + +variable "enable_annotator" { + type = bool + description = "Enable annotator for the Label Studio instance." + default = false +} + +variable "enable_persistent_storage" { + type = bool + description = "Enable persistent storage for the Label Studio instance." + default = false +} + +variable "persistent_storage_size" { + type = string + description = "The size of the persistent storage for the Label Studio instance." + default = "small" +} + +variable "label_studio_disable_signup_without_link" { + type = bool + description = "Disable the signup without link for the Label Studio instance." + default = false +} + +variable "label_studio_username" { + type = string + description = "The username for the Label Studio instance." + default = "davidrd123@gmail.com" + sensitive = true +} + +variable "label_studio_password" { + type = string + description = "The password for the Label Studio instance." + default = "mlstacks" + sensitive = true +} + +variable "label_studio_hardware" { + type = string + description = "The hardware for the Label Studio instance." + default = "cpu-basic" +} + +variable "label_studio_template" { + type = string + description = "The template for the Label Studio instance." + default = "LabelStudio/LabelStudio" +} + +variable "label_studio_sleep_time" { + type = string + description = "The sleep time in seconds for the Label Studio instance. (gc_timeout)" + default = "3600" +} + +variable "label_studio_private" { + type = bool + description = "The private flag for the Label Studio instance." + default = false +} + + diff --git a/src/mlstacks/utils/yaml_utils.py b/src/mlstacks/utils/yaml_utils.py index 0ef0734a..246fef6f 100644 --- a/src/mlstacks/utils/yaml_utils.py +++ b/src/mlstacks/utils/yaml_utils.py @@ -122,7 +122,7 @@ def load_stack_yaml(path: str) -> Stack: ) for component in stack.components: - if component.provider != stack.provider: + if component.provider != stack.provider and component.provider != "huggingface": raise ValueError(STACK_COMPONENT_PROVIDER_MISMATCH_ERROR_MESSAGE) return stack From 5d5b6f5755e612a61e7db7b94131a47d89451180 Mon Sep 17 00:00:00 2001 From: David Dickinson Date: Fri, 12 Apr 2024 01:25:16 -0700 Subject: [PATCH 2/2] Added the yaml files to a new /testing directory that I'm using to test creation of the new label studio component --- src/mlstacks/testing/label_studio_stack.yaml | 11 +++++++++++ src/mlstacks/testing/simple_component_gcs.yaml | 13 +++++++++++++ .../testing/simple_component_label_studio.yaml | 13 +++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 src/mlstacks/testing/label_studio_stack.yaml create mode 100644 src/mlstacks/testing/simple_component_gcs.yaml create mode 100644 src/mlstacks/testing/simple_component_label_studio.yaml diff --git a/src/mlstacks/testing/label_studio_stack.yaml b/src/mlstacks/testing/label_studio_stack.yaml new file mode 100644 index 00000000..b24f5874 --- /dev/null +++ b/src/mlstacks/testing/label_studio_stack.yaml @@ -0,0 +1,11 @@ +spec_version: 1 +spec_type: stack +name: "quickstart_stack" +provider: gcp +default_region: "us-west2" +default_tags: + deployed-by: "mlstacks" +components: + - simple_component_label_studio.yaml + - simple_component_gcs.yaml + diff --git a/src/mlstacks/testing/simple_component_gcs.yaml b/src/mlstacks/testing/simple_component_gcs.yaml new file mode 100644 index 00000000..23ee5e9e --- /dev/null +++ b/src/mlstacks/testing/simple_component_gcs.yaml @@ -0,0 +1,13 @@ +spec_version: 1 +spec_type: component +component_type: "artifact_store" +component_flavor: "gcp" +name: "ls_stack_gcs_bucket" +provider: gcp +metadata: + config: + bucket_name: "ls_stack_gcs_bucket" + project_id: "supple-snow-244700" + tags: + deployed-by: "mlstacks" + region: "us-west2" \ No newline at end of file diff --git a/src/mlstacks/testing/simple_component_label_studio.yaml b/src/mlstacks/testing/simple_component_label_studio.yaml new file mode 100644 index 00000000..b765a69d --- /dev/null +++ b/src/mlstacks/testing/simple_component_label_studio.yaml @@ -0,0 +1,13 @@ +spec_version: 1 +spec_type: component +component_type: "annotator" +component_flavor: "label_studio" +name: "quickstart_label_studio" +provider: "huggingface" +metadata: + config: + project_id: "supple-snow-244700" + tags: + deployed-by: "mlstacks" + +