Skip to content

Commit

Permalink
Merge pull request #888 from roboflow/joao/gaze-detection-block
Browse files Browse the repository at this point in the history
Gaze detection Workflow block
  • Loading branch information
PawelPeczek-Roboflow authored Dec 20, 2024
2 parents 6c5e4fa + 8358782 commit bbd3719
Show file tree
Hide file tree
Showing 17 changed files with 612 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
name: INTEGRATION TESTS - inference CLI + inference CORE

on:
pull_request:
branches: [main]
push:
branches: [main]
workflow_dispatch:
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/integration_tests_inference_cli_x86.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
name: INTEGRATION TESTS - inference CLI

on:
pull_request:
branches: [main]
push:
branches: [main]
workflow_dispatch:
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/integration_tests_inference_models.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
name: INTEGRATION TESTS - inference models

on:
pull_request:
branches: [main]
push:
branches: [main]
workflow_dispatch:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/integration_tests_workflows_x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install --upgrade setuptools
pip install --extra-index-url https://download.pytorch.org/whl/cpu -r requirements/_requirements.txt -r requirements/requirements.cpu.txt -r requirements/requirements.sdk.http.txt -r requirements/requirements.test.unit.txt -r requirements/requirements.http.txt -r requirements/requirements.yolo_world.txt -r requirements/requirements.doctr.txt -r requirements/requirements.sam.txt -r requirements/requirements.transformers.txt
pip install --extra-index-url https://download.pytorch.org/whl/cpu -r requirements/_requirements.txt -r requirements/requirements.sam.txt -r requirements/requirements.cpu.txt -r requirements/requirements.http.txt -r requirements/requirements.test.unit.txt -r requirements/requirements.doctr.txt -r requirements/requirements.yolo_world.txt -r requirements/requirements.transformers.txt -r requirements/requirements.sdk.http.txt
- name: 🧪 Integration Tests of Workflows
run: ROBOFLOW_API_KEY=${{ secrets.API_KEY }} SKIP_FLORENCE2_TEST=FALSE LOAD_ENTERPRISE_BLOCKS=TRUE python -m pytest tests/workflows/integration_tests
4 changes: 2 additions & 2 deletions inference/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@
CLIP_MODEL_ID = f"clip/{CLIP_VERSION_ID}"

# Gaze version ID, default is "L2CS"
GAZE_VERSION_ID = os.getenv("GAZE_VERSION_ID", "L2CS")
GAZE_VERSION_ID = os.getenv("GAZE_VERSION_ID", "l2cs")

# Gaze model ID
GAZE_MODEL_ID = f"gaze/{CLIP_VERSION_ID}"
GAZE_MODEL_ID = f"gaze/{GAZE_VERSION_ID}"

# OWLv2 version ID, default is "owlv2-large-patch14-ensemble"
OWLV2_VERSION_ID = os.getenv("OWLV2_VERSION_ID", "owlv2-large-patch14-ensemble")
Expand Down
2 changes: 1 addition & 1 deletion inference/core/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.31.2rc1"
__version__ = "0.32.0"


if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions inference/core/workflows/core_steps/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from inference.core.entities.requests.clip import ClipCompareRequest
from inference.core.entities.requests.cogvlm import CogVLMInferenceRequest
from inference.core.entities.requests.doctr import DoctrOCRInferenceRequest
from inference.core.entities.requests.gaze import GazeDetectionInferenceRequest
from inference.core.entities.requests.sam2 import Sam2InferenceRequest
from inference.core.entities.requests.yolo_world import YOLOWorldInferenceRequest
from inference.core.managers.base import ModelManager
Expand Down Expand Up @@ -58,6 +59,7 @@ def load_core_model(
CogVLMInferenceRequest,
YOLOWorldInferenceRequest,
Sam2InferenceRequest,
GazeDetectionInferenceRequest,
],
core_model: str,
) -> str:
Expand Down
2 changes: 2 additions & 0 deletions inference/core/workflows/core_steps/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
from inference.core.workflows.core_steps.models.foundation.florence2.v2 import (
Florence2BlockV2,
)
from inference.core.workflows.core_steps.models.foundation.gaze.v1 import GazeBlockV1
from inference.core.workflows.core_steps.models.foundation.google_gemini.v1 import (
GoogleGeminiBlockV1,
)
Expand Down Expand Up @@ -598,6 +599,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]:
EnvironmentSecretsStoreBlockV1,
SlackNotificationBlockV1,
TwilioSMSNotificationBlockV1,
GazeBlockV1,
]


Expand Down
Empty file.
247 changes: 247 additions & 0 deletions inference/core/workflows/core_steps/models/foundation/gaze/v1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
from typing import List, Literal, Optional, Tuple, Type, Union

import numpy as np
import supervision as sv
from pydantic import ConfigDict, Field

from inference.core.entities.requests.gaze import GazeDetectionInferenceRequest
from inference.core.managers.base import ModelManager
from inference.core.workflows.core_steps.common.entities import StepExecutionMode
from inference.core.workflows.core_steps.common.utils import (
add_inference_keypoints_to_sv_detections,
attach_parents_coordinates_to_batch_of_sv_detections,
attach_prediction_type_info_to_sv_detections_batch,
convert_inference_detections_batch_to_sv_detections,
load_core_model,
)
from inference.core.workflows.execution_engine.entities.base import (
Batch,
OutputDefinition,
WorkflowImageData,
)
from inference.core.workflows.execution_engine.entities.types import (
BOOLEAN_KIND,
FLOAT_KIND,
IMAGE_KIND,
KEYPOINT_DETECTION_PREDICTION_KIND,
ImageInputField,
Selector,
)
from inference.core.workflows.prototypes.block import (
BlockResult,
WorkflowBlock,
WorkflowBlockManifest,
)

LONG_DESCRIPTION = """
Run L2CS Gaze detection model on faces in images.
This block can:
1. Detect faces in images and estimate their gaze direction
2. Estimate gaze direction on pre-cropped face images
The gaze direction is represented by yaw and pitch angles in degrees.
"""


def convert_gaze_detections_to_sv_detections_and_angles(
images: Batch[WorkflowImageData],
gaze_predictions: List[dict],
) -> Tuple[List[sv.Detections], List[List[float]], List[List[float]]]:
"""Convert gaze detection results to supervision detections and angle lists."""
face_predictions = []
yaw_degrees = []
pitch_degrees = []

for single_image, predictions in zip(images, gaze_predictions):
height, width = single_image.numpy_image.shape[:2]

# Format predictions for this image
image_face_preds = {
"predictions": [],
"image": {"width": width, "height": height},
}
batch_yaw = []
batch_pitch = []

for p in predictions: # predictions is already a list
p_dict = p.model_dump(by_alias=True, exclude_none=True)
for pred in p_dict["predictions"]:
face = pred["face"]

# Face detection with landmarks
face_pred = {
"x": face["x"],
"y": face["y"],
"width": face["width"],
"height": face["height"],
"confidence": face["confidence"],
"class": "face",
"class_id": 0,
"keypoints": [
{
"x": l["x"],
"y": l["y"],
"confidence": face["confidence"],
"class_name": str(i),
"class_id": i,
}
for i, l in enumerate(face["landmarks"])
],
}

image_face_preds["predictions"].append(face_pred)

# Store angles in degrees
batch_yaw.append(pred["yaw"] * 180 / np.pi)
batch_pitch.append(pred["pitch"] * 180 / np.pi)

face_predictions.append(image_face_preds)
yaw_degrees.append(batch_yaw)
pitch_degrees.append(batch_pitch)

# Process predictions
face_preds = convert_inference_detections_batch_to_sv_detections(face_predictions)

# Add keypoints to supervision detections
for prediction, detections in zip(face_predictions, face_preds):
add_inference_keypoints_to_sv_detections(
inference_prediction=prediction["predictions"],
detections=detections,
)

face_preds = attach_prediction_type_info_to_sv_detections_batch(
predictions=face_preds,
prediction_type="facial-landmark",
)
face_preds = attach_parents_coordinates_to_batch_of_sv_detections(
images=images,
predictions=face_preds,
)

return face_preds, yaw_degrees, pitch_degrees


class BlockManifest(WorkflowBlockManifest):
model_config = ConfigDict(
json_schema_extra={
"name": "Gaze Detection",
"version": "v1",
"short_description": "Detect faces and estimate gaze direction",
"long_description": LONG_DESCRIPTION,
"license": "Apache-2.0",
"block_type": "model",
"search_keywords": ["gaze", "face"],
},
protected_namespaces=(),
)

type: Literal["roboflow_core/gaze@v1"]
images: Selector(kind=[IMAGE_KIND]) = ImageInputField
do_run_face_detection: Union[bool, Selector(kind=[BOOLEAN_KIND])] = Field(
default=True,
description="Whether to run face detection. Set to False if input images are pre-cropped face images.",
)

@classmethod
def get_parameters_accepting_batches(cls) -> List[str]:
return ["images"]

@classmethod
def describe_outputs(cls) -> List[OutputDefinition]:
return [
OutputDefinition(
name="face_predictions",
kind=[KEYPOINT_DETECTION_PREDICTION_KIND],
description="Facial landmark predictions",
),
OutputDefinition(
name="yaw_degrees",
kind=[FLOAT_KIND],
description="Yaw angle in degrees (-180 to 180, negative is left)",
),
OutputDefinition(
name="pitch_degrees",
kind=[FLOAT_KIND],
description="Pitch angle in degrees (-90 to 90, negative is down)",
),
]

@classmethod
def get_execution_engine_compatibility(cls) -> Optional[str]:
return ">=1.3.0,<2.0.0"


class GazeBlockV1(WorkflowBlock):
def __init__(
self,
model_manager: ModelManager,
api_key: Optional[str],
step_execution_mode: StepExecutionMode,
):
self._model_manager = model_manager
self._api_key = api_key
self._step_execution_mode = step_execution_mode

@classmethod
def get_init_parameters(cls) -> List[str]:
return ["model_manager", "api_key", "step_execution_mode"]

@classmethod
def get_manifest(cls) -> Type[WorkflowBlockManifest]:
return BlockManifest

def run(
self,
images: Batch[WorkflowImageData],
do_run_face_detection: bool,
) -> BlockResult:
if self._step_execution_mode is StepExecutionMode.LOCAL:
return self.run_locally(
images=images,
do_run_face_detection=do_run_face_detection,
)
else:
raise ValueError(
f"Unsupported step execution mode: {self._step_execution_mode}"
)

def run_locally(
self,
images: Batch[WorkflowImageData],
do_run_face_detection: bool,
) -> BlockResult:
predictions = []

for single_image in images:
inference_request = GazeDetectionInferenceRequest(
image=single_image.to_inference_format(numpy_preferred=True),
do_run_face_detection=do_run_face_detection,
api_key=self._api_key,
)
gaze_model_id = load_core_model(
model_manager=self._model_manager,
inference_request=inference_request,
core_model="gaze",
)
prediction = self._model_manager.infer_from_request_sync(
gaze_model_id, inference_request
)
predictions.append(prediction)

# Convert predictions to supervision format and get angles
face_preds, yaw_degrees, pitch_degrees = (
convert_gaze_detections_to_sv_detections_and_angles(
images=images,
gaze_predictions=predictions,
)
)

return [
{
"face_predictions": face_pred,
"yaw_degrees": yaw,
"pitch_degrees": pitch,
}
for face_pred, yaw, pitch in zip(face_preds, yaw_degrees, pitch_degrees)
]
Loading

0 comments on commit bbd3719

Please sign in to comment.