Merge branch 'develop'

ShipBit · Sep 20, 2024 · c50570d · c50570d
2 parents aa0f689 + f66b2a9
commit c50570d
Show file tree

Hide file tree

Showing 526 changed files with 82,402 additions and 289 deletions.
diff --git a/README.md b/README.md
diff --git a/api/commands.py b/api/commands.py
@@ -26,11 +26,12 @@ class SaveSecretCommand(WebSocketCommandModel):
 
 class RecordKeyboardActionsCommand(WebSocketCommandModel):
     command: Literal["record_keyboard_actions"] = "record_keyboard_actions"
-    recording_type: KeyboardRecordingType = KeyboardRecordingType.SINGLE
+    recording_type: KeyboardRecordingType
 
 
 class StopRecordingCommand(WebSocketCommandModel):
     command: Literal["stop_recording"] = "stop_recording"
+    recording_type: KeyboardRecordingType
 
 
 # SENT TO CLIENT

diff --git a/api/enums.py b/api/enums.py
@@ -50,6 +50,7 @@ class CustomPropertyType(Enum):
     SINGLE_SELECT = "single_select"
     VOICE_SELECTION = "voice_selection"
     SLIDER = "slider"
+    AUDIO_FILES = "audio_files"
 
 
 class AzureApiVersion(Enum):
@@ -68,13 +69,6 @@ class TtsVoiceGender(Enum):
     FEMALE = "Female"
 
 
-class OpenAiModel(Enum):
-    """https://platform.openai.com/docs/models/overview"""
-
-    GPT_4O = "gpt-4o"
-    GPT_4O_MINI = "gpt-4o-mini"
-
-
 class MistralModel(Enum):
     """https://docs.mistral.ai/getting-started/models/"""
 
@@ -85,6 +79,16 @@ class MistralModel(Enum):
     MISTRAL_MEDIUM = "mistral-medium-latest"
     MISTRAL_LARGE = "mistral-large-latest"
 
+class PerplexityModel(Enum):
+    """https://docs.perplexity.ai/guides/model-cards"""
+
+    SONAR_SMALL = "llama-3.1-sonar-small-128k-online"
+    SONAR_MEDIUM = "llama-3.1-sonar-large-128k-online"
+    SONAR_LARGE = "llama-3.1-sonar-huge-128k-online"
+    CHAT_SMALL = "llama-3.1-sonar-small-128k-chat"
+    CHAT_LARGE = "llama-3.1-sonar-large-128k-chat"
+    LLAMA3_8B = "llama-3.1-8b-instruct"
+    LLAMA3_70B = "llama-3.1-70b-instruct"
 
 class GoogleAiModel(Enum):
     GEMINI_1_5_FLASH = "gemini-1.5-flash"
@@ -151,6 +155,8 @@ class ConversationProvider(Enum):
     AZURE = "azure"
     WINGMAN_PRO = "wingman_pro"
     GOOGLE = "google"
+    CEREBRAS = "cerebras"
+    PERPLEXITY = "perplexity"
 
 
 class ImageGenerationProvider(Enum):
@@ -184,6 +190,7 @@ class SkillCategory(Enum):
     STAR_CITIZEN = "star_citizen"
     TRUCK_SIMULATOR = "truck_simulator"
     NO_MANS_SKY = "no_mans_sky"
+    FLIGHT_SIMULATOR = "flight_simulator"
 
 
 # Pydantic models for enums
@@ -231,13 +238,11 @@ class TtsVoiceGenderEnumModel(BaseEnumModel):
     gender: TtsVoiceGender
 
 
-class OpenAiModelEnumModel(BaseEnumModel):
-    model: OpenAiModel
-
-
 class MistralModelEnumModel(BaseEnumModel):
     model: MistralModel
 
+class PerplexityModelEnumModel(BaseEnumModel):
+    model: PerplexityModel
 
 class GoogleAiModelEnumModel(BaseEnumModel):
     model: GoogleAiModel
@@ -309,7 +314,6 @@ class SkillCategoryModel(BaseEnumModel):
     "AzureApiVersion": AzureApiVersionEnumModel,
     "AzureRegion": AzureRegionEnumModel,
     "TtsVoiceGender": TtsVoiceGenderEnumModel,
-    "OpenAiModel": OpenAiModelEnumModel,
     "MistralModel": MistralModelEnumModel,
     "GoogleAiModel": GoogleAiModelEnumModel,
     "WingmanProAzureDeployment": WingmanProAzureDeploymentEnumModel,
@@ -324,6 +328,7 @@ class SkillCategoryModel(BaseEnumModel):
     "WingmanProSttProvider": WingmanProSttProviderModel,
     "WingmanProTtsProvider": WingmanProTtsProviderModel,
     "SkillCategory": SkillCategoryModel,
+    "PerplexityModel": PerplexityModelEnumModel,
     # Add new enums here as key-value pairs
 }
 

diff --git a/api/interface.py b/api/interface.py
@@ -11,7 +11,6 @@
     CustomPropertyType,
     SkillCategory,
     TtsVoiceGender,
-    OpenAiModel,
     OpenAiTtsVoice,
     SoundEffect,
     SttProvider,
@@ -22,6 +21,7 @@
     WingmanProRegion,
     WingmanProSttProvider,
     WingmanProTtsProvider,
+    PerplexityModel,
 )
 
 
@@ -278,7 +278,7 @@ class XVASynthTtsConfig(BaseModel):
 
 
 class OpenAiConfig(BaseModel):
-    conversation_model: OpenAiModel
+    conversation_model: str
     """ The model to use for conversations aka "chit-chat" and for function calls.
     """
 
@@ -311,11 +311,21 @@ class MistralConfig(BaseModel):
     endpoint: str
 
 
+class PerplexityConfig(BaseModel):
+    conversation_model: PerplexityModel
+    endpoint: str
+
+
 class GroqConfig(BaseModel):
     conversation_model: str
     endpoint: str
 
 
+class CerebrasConfig(BaseModel):
+    conversation_model: str
+    endpoint: str
+
+
 class GoogleConfig(BaseModel):
     conversation_model: GoogleAiModel
 
@@ -391,6 +401,25 @@ class FeaturesConfig(BaseModel):
     use_generic_instant_responses: bool
 
 
+class AudioFile(BaseModel):
+    path: str
+    """The audio file to play. Required."""
+
+    name: str
+    """The name of the audio file."""
+
+
+class AudioFileConfig(BaseModel):
+    files: list[AudioFile]
+    """The audio file(s) to play. If there are multiple, a random file will be played."""
+
+    volume: float
+    """The volume to play the audio file at."""
+
+    wait: bool
+    """Whether to wait for the audio file to finish playing before continuing."""
+
+
 class CommandKeyboardConfig(BaseModel):
     hotkey: str
     """The hotkey. Can be a single key like 'a' or a combination like 'ctrl+shift+a'."""
@@ -441,6 +470,9 @@ class CommandActionConfig(BaseModel):
     write: Optional[str] = None
     """The word or phrase to type, for example, to type text in a login screen.  Must have associated button press to work.  May need special formatting for special characters."""
 
+    audio: Optional[AudioFileConfig] = None
+    """The audio file to play. Optional."""
+
 
 class CommandConfig(BaseModel):
     name: str
@@ -507,7 +539,15 @@ class CustomProperty(BaseModel):
     """The name of the property. Has to be unique"""
     name: str
     """The "friendly" name of the property, displayed in the UI."""
-    value: str | int | float | bool | VoiceSelection | list[VoiceSelection]
+    value: (
+        str
+        | int
+        | float
+        | bool
+        | VoiceSelection
+        | list[VoiceSelection]
+        | AudioFileConfig
+    )
     """The value of the property"""
     property_type: CustomPropertyType
     """Determines the type of the property and which controls to render in the UI."""
@@ -553,6 +593,7 @@ class NestedConfig(BaseModel):
     openai: OpenAiConfig
     mistral: MistralConfig
     groq: GroqConfig
+    cerebras: CerebrasConfig
     google: GoogleConfig
     openrouter: OpenRouterConfig
     local_llm: LocalLlmConfig
@@ -562,6 +603,7 @@ class NestedConfig(BaseModel):
     xvasynth: XVASynthTtsConfig
     whispercpp: WhispercppSttConfig
     wingman_pro: WingmanProConfig
+    perplexity: PerplexityConfig
     commands: Optional[list[CommandConfig]] = None
     skills: Optional[list[SkillConfig]] = None
 

diff --git a/assets/wingman-ui-1.png b/assets/wingman-ui-1.png
diff --git a/assets/wingman-ui-2.png b/assets/wingman-ui-2.png
diff --git a/assets/wingman-ui-3.png b/assets/wingman-ui-3.png
diff --git a/assets/wingman-ui-4.png b/assets/wingman-ui-4.png
diff --git a/assets/wingman-ui-5.png b/assets/wingman-ui-5.png
diff --git a/assets/wingman-ui-6.png b/assets/wingman-ui-6.png
diff --git a/assets/wingman-ui-7.png b/assets/wingman-ui-7.png
diff --git a/assets/wingman-ui-8.png b/assets/wingman-ui-8.png
diff --git a/main.py b/main.py
@@ -131,6 +131,29 @@ def custom_openapi():
     # Ensure the components.schemas key exists
     openapi_schema.setdefault("components", {}).setdefault("schemas", {})
 
+    # Add enums to schema
+    for enum_name, enum_model in ENUM_TYPES.items():
+        enum_field_name, enum_type = next(iter(enum_model.__annotations__.items()))
+        if issubclass(enum_type, Enum):
+            enum_values = [e.value for e in enum_type]
+            enum_schema = {
+                "type": "string",
+                "enum": enum_values,
+                "description": f"Possible values for {enum_name}",
+            }
+            openapi_schema["components"]["schemas"][enum_name] = enum_schema
+
+    openapi_schema["components"]["schemas"]["CommandActionConfig"] = {
+        "type": "object",
+        "properties": {
+            "keyboard": {"$ref": "#/components/schemas/CommandKeyboardConfig"},
+            "wait": {"type": "number"},
+            "mouse": {"$ref": "#/components/schemas/CommandMouseConfig"},
+            "write": {"type": "string"},
+            "audio": {"$ref": "#/components/schemas/AudioFileConfig"},
+        },
+    }
+
     # Add WebSocket command models to schema
     for cls in WebSocketCommandModel.__subclasses__():
         cls_schema_dict = cls.model_json_schema(
@@ -156,18 +179,6 @@ def custom_openapi():
                 cls_schema_dict.setdefault("required", []).append(field_name)
         openapi_schema["components"]["schemas"][cls.__name__] = cls_schema_dict
 
-    # Add enums to schema
-    for enum_name, enum_model in ENUM_TYPES.items():
-        enum_field_name, enum_type = next(iter(enum_model.__annotations__.items()))
-        if issubclass(enum_type, Enum):
-            enum_values = [e.value for e in enum_type]
-            enum_schema = {
-                "type": "string",
-                "enum": enum_values,
-                "description": f"Possible values for {enum_name}",
-            }
-            openapi_schema["components"]["schemas"][enum_name] = enum_schema
-
     app.openapi_schema = openapi_schema
     return app.openapi_schema
 
@@ -244,7 +255,7 @@ async def async_main(host: str, port: int, sidecar: bool):
             secret = input(f"Please enter your '{error.secret_name}' API key/secret: ")
             if secret:
                 secret_keeper.secrets[error.secret_name] = secret
-                secret_keeper.save()
+                await secret_keeper.save()
                 saved_secrets.append(error.secret_name)
             else:
                 return

diff --git a/providers/elevenlabs.py b/providers/elevenlabs.py
@@ -1,8 +1,6 @@
-from elevenlabslib import (
-    User,
-    GenerationOptions,
-    PlaybackOptions,
-)
+import asyncio
+from typing import Optional
+from elevenlabslib import User, GenerationOptions, PlaybackOptions, SFXGenerationOptions
 from api.enums import SoundEffect, WingmanInitializationErrorType
 from api.interface import ElevenlabsConfig, SoundConfig, WingmanInitializationError
 from services.audio_player import AudioPlayer
@@ -55,26 +53,26 @@ def notify_playback_finished():
 
             contains_high_end_radio = SoundEffect.HIGH_END_RADIO in sound_config.effects
             if contains_high_end_radio:
-                audio_player.play_wav("Radio_Static_Beep.wav", sound_config.volume)
+                audio_player.play_wav_sample("Radio_Static_Beep.wav", sound_config.volume)
 
             if sound_config.play_beep:
-                audio_player.play_wav("beep.wav", sound_config.volume)
+                audio_player.play_wav_sample("beep.wav", sound_config.volume)
             elif sound_config.play_beep_apollo:
-                audio_player.play_wav("Apollo_Beep.wav", sound_config.volume)
+                audio_player.play_wav_sample("Apollo_Beep.wav", sound_config.volume)
 
             WebSocketUser.ensure_async(
                 audio_player.notify_playback_finished(wingman_name)
             )
 
         def notify_playback_started():
             if sound_config.play_beep:
-                audio_player.play_wav("beep.wav", sound_config.volume)
+                audio_player.play_wav_sample("beep.wav", sound_config.volume)
             elif sound_config.play_beep_apollo:
-                audio_player.play_wav("Apollo_Beep.wav", sound_config.volume)
+                audio_player.play_wav_sample("Apollo_Beep.wav", sound_config.volume)
 
             contains_high_end_radio = SoundEffect.HIGH_END_RADIO in sound_config.effects
             if contains_high_end_radio:
-                audio_player.play_wav("Radio_Static_Beep.wav", sound_config.volume)
+                audio_player.play_wav_sample("Radio_Static_Beep.wav", sound_config.volume)
 
             WebSocketUser.ensure_async(
                 audio_player.notify_playback_started(wingman_name)
@@ -142,10 +140,40 @@ def playback_finished(wingman_name):
 
             audio_player.playback_events.subscribe("finished", playback_finished)
 
+    async def generate_sound_effect(
+        self,
+        prompt: str,
+        duration_seconds: Optional[float] = None,
+        prompt_influence: Optional[float] = None,
+    ):
+        user = User(self.api_key)
+        options = SFXGenerationOptions(
+            duration_seconds=duration_seconds, prompt_influence=prompt_influence
+        )
+        req, _ = user.generate_sfx(prompt, options)
+
+        result_ready = asyncio.Event()
+        audio: bytes = None
+
+        def get_result(future: asyncio.Future[bytes]):
+            nonlocal audio
+            audio = future.result()
+            result_ready.set()  # Signal that the result is ready
+
+        req.add_done_callback(get_result)
+
+        # Wait for the result to be ready
+        await result_ready.wait()
+        return audio
+
     def get_available_voices(self):
         user = User(self.api_key)
         return user.get_available_voices()
 
     def get_available_models(self):
         user = User(self.api_key)
         return user.get_models()
+
+    def get_subscription_data(self):
+        user = User(self.api_key)
+        return user.get_subscription_data()
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 azure-cognitiveservices-speech==1.38.0
 edge-tts==6.1.12
-elevenlabslib==0.22.5
+elevenlabslib==0.22.6
 fastapi==0.111.0
 google-generativeai==0.7.0
 markdown==3.6