From 570371e6e25e7627d913019ae9711a9cb508a713 Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Tue, 5 Nov 2024 11:14:44 +0530 Subject: [PATCH 01/10] added improved prompts for better consistent results --- examples/video_summarization/workflow.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/examples/video_summarization/workflow.py b/examples/video_summarization/workflow.py index ac09c4ffb..7e3920ec6 100644 --- a/examples/video_summarization/workflow.py +++ b/examples/video_summarization/workflow.py @@ -97,7 +97,7 @@ def classify_meeting_intent(speech: Transcription) -> Transcription: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Classify the intent of the audio. Possible intents: + Analyze the audio transcript and classify the intent of the audio FROM THE FOLLOWING OPTIONS ONLY: - job-interview - sales-call - customer-support-call @@ -105,10 +105,11 @@ def classify_meeting_intent(speech: Transcription) -> Transcription: - marketing-call - product-call - financial-call - Format: intent: + Required Output Format: intent: Transcription: {transcription_text} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! Provide ONLY the intent in the required format. """ output = model(prompt=prompt, max_tokens=50, stop=["\n"]) response = output["choices"][0]["text"] @@ -136,7 +137,7 @@ def summarize_job_interview(speech: Transcription) -> Summary: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Summarize the key points from this job interview transcript, including: + Analyze this job interview transcript and summarize the key points in the below format ONLY: 1. Candidate's Strengths and Qualifications 2. Key Responses and Insights 3. Cultural Fit and Soft Skills @@ -144,7 +145,8 @@ def summarize_job_interview(speech: Transcription) -> Summary: 5. Overall Impression and Recommendation Transcript: - {transcription_text} + {transcription_text[:1000]} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! """ output = model(prompt=prompt, max_tokens=30000, stop=["\n"]) return Summary(summary=output["choices"][0]["text"]) @@ -161,15 +163,16 @@ def summarize_sales_call(speech: Transcription) -> Summary: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Summarize this sales call transcript, highlighting: - - Key details - - Client concerns - - Action items - - Next steps - - Recommendations for improving the approach + Analyze this sales call transcript and summarize in the below format ONLY: + 1. Key details + 2. Client concerns + 3. Action items + 4. Next steps + 5. Recommendations for improving the approach Transcript: {transcription_text} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! """ output = model(prompt=prompt, max_tokens=30000, stop=["\n"]) return Summary(summary=output["choices"][0]["text"]) From 2db61f23db0d976781a6e82392f3e2ae1858d432 Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Tue, 5 Nov 2024 11:16:44 +0530 Subject: [PATCH 02/10] Revert "added improved prompts for better consistent results" This reverts commit 570371e6e25e7627d913019ae9711a9cb508a713. --- examples/video_summarization/workflow.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/examples/video_summarization/workflow.py b/examples/video_summarization/workflow.py index 7e3920ec6..ac09c4ffb 100644 --- a/examples/video_summarization/workflow.py +++ b/examples/video_summarization/workflow.py @@ -97,7 +97,7 @@ def classify_meeting_intent(speech: Transcription) -> Transcription: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Analyze the audio transcript and classify the intent of the audio FROM THE FOLLOWING OPTIONS ONLY: + Classify the intent of the audio. Possible intents: - job-interview - sales-call - customer-support-call @@ -105,11 +105,10 @@ def classify_meeting_intent(speech: Transcription) -> Transcription: - marketing-call - product-call - financial-call - Required Output Format: intent: + Format: intent: Transcription: {transcription_text} - DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! Provide ONLY the intent in the required format. """ output = model(prompt=prompt, max_tokens=50, stop=["\n"]) response = output["choices"][0]["text"] @@ -137,7 +136,7 @@ def summarize_job_interview(speech: Transcription) -> Summary: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Analyze this job interview transcript and summarize the key points in the below format ONLY: + Summarize the key points from this job interview transcript, including: 1. Candidate's Strengths and Qualifications 2. Key Responses and Insights 3. Cultural Fit and Soft Skills @@ -145,8 +144,7 @@ def summarize_job_interview(speech: Transcription) -> Summary: 5. Overall Impression and Recommendation Transcript: - {transcription_text[:1000]} - DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! + {transcription_text} """ output = model(prompt=prompt, max_tokens=30000, stop=["\n"]) return Summary(summary=output["choices"][0]["text"]) @@ -163,16 +161,15 @@ def summarize_sales_call(speech: Transcription) -> Summary: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Analyze this sales call transcript and summarize in the below format ONLY: - 1. Key details - 2. Client concerns - 3. Action items - 4. Next steps - 5. Recommendations for improving the approach + Summarize this sales call transcript, highlighting: + - Key details + - Client concerns + - Action items + - Next steps + - Recommendations for improving the approach Transcript: {transcription_text} - DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! """ output = model(prompt=prompt, max_tokens=30000, stop=["\n"]) return Summary(summary=output["choices"][0]["text"]) From 00fec5ab25e92fa01cd90e37ba34b131d28b14bd Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Tue, 5 Nov 2024 11:18:09 +0530 Subject: [PATCH 03/10] added improved prompts that gave better consistent results in workflow --- examples/video_summarization/workflow.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/examples/video_summarization/workflow.py b/examples/video_summarization/workflow.py index ac09c4ffb..7e3920ec6 100644 --- a/examples/video_summarization/workflow.py +++ b/examples/video_summarization/workflow.py @@ -97,7 +97,7 @@ def classify_meeting_intent(speech: Transcription) -> Transcription: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Classify the intent of the audio. Possible intents: + Analyze the audio transcript and classify the intent of the audio FROM THE FOLLOWING OPTIONS ONLY: - job-interview - sales-call - customer-support-call @@ -105,10 +105,11 @@ def classify_meeting_intent(speech: Transcription) -> Transcription: - marketing-call - product-call - financial-call - Format: intent: + Required Output Format: intent: Transcription: {transcription_text} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! Provide ONLY the intent in the required format. """ output = model(prompt=prompt, max_tokens=50, stop=["\n"]) response = output["choices"][0]["text"] @@ -136,7 +137,7 @@ def summarize_job_interview(speech: Transcription) -> Summary: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Summarize the key points from this job interview transcript, including: + Analyze this job interview transcript and summarize the key points in the below format ONLY: 1. Candidate's Strengths and Qualifications 2. Key Responses and Insights 3. Cultural Fit and Soft Skills @@ -144,7 +145,8 @@ def summarize_job_interview(speech: Transcription) -> Summary: 5. Overall Impression and Recommendation Transcript: - {transcription_text} + {transcription_text[:1000]} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! """ output = model(prompt=prompt, max_tokens=30000, stop=["\n"]) return Summary(summary=output["choices"][0]["text"]) @@ -161,15 +163,16 @@ def summarize_sales_call(speech: Transcription) -> Summary: ) transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Summarize this sales call transcript, highlighting: - - Key details - - Client concerns - - Action items - - Next steps - - Recommendations for improving the approach + Analyze this sales call transcript and summarize in the below format ONLY: + 1. Key details + 2. Client concerns + 3. Action items + 4. Next steps + 5. Recommendations for improving the approach Transcript: {transcription_text} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! """ output = model(prompt=prompt, max_tokens=30000, stop=["\n"]) return Summary(summary=output["choices"][0]["text"]) From 7c63e386acbca9fdb21dd6b03116ab44e6f81258 Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Tue, 5 Nov 2024 13:28:16 +0530 Subject: [PATCH 04/10] added improved prompts that gave better consistent results in workflow --- examples/video_summarization/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/video_summarization/workflow.py b/examples/video_summarization/workflow.py index 7e3920ec6..ecbab1fab 100644 --- a/examples/video_summarization/workflow.py +++ b/examples/video_summarization/workflow.py @@ -145,7 +145,7 @@ def summarize_job_interview(speech: Transcription) -> Summary: 5. Overall Impression and Recommendation Transcript: - {transcription_text[:1000]} + {transcription_text} DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! """ output = model(prompt=prompt, max_tokens=30000, stop=["\n"]) From 96729b3137588c2d3ac907107df0f9cc4b037bf7 Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Wed, 6 Nov 2024 21:56:32 +0530 Subject: [PATCH 05/10] added workflow --- examples/conversation_extraction/main.py | 366 +++++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 examples/conversation_extraction/main.py diff --git a/examples/conversation_extraction/main.py b/examples/conversation_extraction/main.py new file mode 100644 index 000000000..8b6e28ec1 --- /dev/null +++ b/examples/conversation_extraction/main.py @@ -0,0 +1,366 @@ +from pydantic import BaseModel, Field +from typing import List, Optional, Union +import logging +from indexify_extractor_sdk import Extractor, Content, Feature +from indexify.functions_sdk.graph import Graph +from indexify.functions_sdk.data_objects import File +from indexify.functions_sdk.image import Image +from indexify.functions_sdk.indexify_functions import indexify_function, indexify_router +import json +import io +import re +import datetime +from indexify import RemoteGraph +from dotenv import load_dotenv +import os +from llama_cpp import Llama + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +class BaseConversationSchema(BaseModel): + meeting_id: str + date: datetime.datetime + duration: int + participants: List[str] + meeting_type: str + summary: str + + class Config: + arbitrary_types_allowed = True + +class SpeechSegment(BaseModel): + speaker: Optional[str] = None + text: str + start_ts: float + end_ts: float + +class SpeechClassification(BaseModel): + classification: str + confidence: float + +class Transcription(BaseModel): + segments: List[SpeechSegment] + classification: Optional[SpeechClassification] = None + +class StrategyMeetingSchema(BaseConversationSchema): + key_decisions: List[str] + risk_assessments: List[str] + strategic_initiatives: List[str] + action_items: List[str] + +class SalesCallSchema(BaseConversationSchema): + customer_pain_points: List[str] + proposed_solutions: List[str] + objections: List[str] + next_steps: List[str] + +class RDBrainstormSchema(BaseConversationSchema): + innovative_ideas: List[str] + technical_challenges: List[str] + resource_requirements: List[str] + potential_impacts: List[str] + + +base_image = ( + Image() + .name("tensorlake/base-image") +) + +transcribe_image = Image().name("tensorlake/transcriber").run("pip install faster_whisper") + +llama_cpp_image = ( + Image() + .name("tensorlake/llama-cpp") + .run("apt-get update && apt-get install -y build-essential libgomp1") + .run("pip install llama-cpp-python huggingface_hub") + .run("apt-get purge -y build-essential && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*") +) + +def get_chat_completion(prompt: str) -> str: + model = Llama.from_pretrained( + repo_id="NousResearch/Hermes-3-Llama-3.1-8B-GGUF", + filename="*Q8_0.gguf", + verbose=True, + n_ctx=60000, + ) + output = model(prompt=prompt, max_tokens=30000, stop=["\n"]) + return output["choices"][0]["text"] + +@indexify_function(image=transcribe_image) +def transcribe_audio(file: File) -> Transcription: + """Transcribe audio and diarize speakers.""" + from faster_whisper import WhisperModel + model = WhisperModel("base", device="cpu") + segments, _ = model.transcribe(io.BytesIO(file.data)) + audio_segments = [SpeechSegment(text=segment.text, start_ts=segment.start, end_ts=segment.end) for segment in segments] + return Transcription(segments=audio_segments) + +@indexify_function(image=groq_image) +def classify_meeting_intent(speech: Transcription) -> Transcription: + try: + """Classify the intent of the audio.""" + transcription_text = "\n".join([segment.text for segment in speech.segments]) + prompt = f""" + Analyze the audio transcript and classify the intent of the audio FROM THE FOLLOWING OPTIONS ONLY: + - strategy-meeting + - sales-call + - marketing-call + - product-call + - rd-brainstorm + Required Output Format: intent: + + Transcription: + {transcription_text} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! ONLY the Intent in the required format. + """ + response = get_chat_completion(prompt) + intent = response.split(":")[-1].strip() + if intent in ["strategy-meeting", "sales-call", "marketing-call", "product-call", "rd-brainstorm"]: + speech.classification = SpeechClassification(classification=intent, confidence=0.8) + else: + speech.classification = SpeechClassification(classification="unknown", confidence=0.5) + + return speech + except Exception as e: + logging.error(f"Error classifying meeting intent: {e}") + speech.classification = SpeechClassification(classification="unknown", confidence=0.5) + return speech + +@indexify_function(image=groq_image) +def summarize_strategy_meeting(speech: Transcription) -> StrategyMeetingSchema: + """Summarize the strategy meeting.""" + transcription_text = "\n".join([segment.text for segment in speech.segments]) + prompt = f""" + Analyze this strategy meeting transcript and extract data in this format only: + 1. Key decisions:\n + 2. Risk assessments:\n + 3. Strategic initiatives:\n + 4. Action items:\n + + Transcript: + {transcription_text} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! + """ + summary = get_chat_completion(prompt) + lines = summary.split('\n') + key_decisions = [] + risk_assessments = [] + strategic_initiatives = [] + action_items = [] + current_list = None + for line in lines: + line = line.strip() + if line.startswith('1. Key decisions:'): + current_list = key_decisions + elif line.startswith('2. Risk assessments:'): + current_list = risk_assessments + elif line.startswith('3. Strategic initiatives:'): + current_list = strategic_initiatives + elif line.startswith('4. Action items:'): + current_list = action_items + elif current_list is not None and line: + current_list.append(line.rstrip(', ')) + + # Ensure all required fields are populated + return StrategyMeetingSchema( + meeting_id=f"Strategy-{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}", + date=datetime.datetime.now(), + duration=sum((segment.end_ts - segment.start_ts) for segment in speech.segments), + participants=[segment.speaker for segment in speech.segments if segment.speaker], + meeting_type="Strategy Meeting", + summary=summary, + key_decisions=key_decisions, + risk_assessments=risk_assessments, + strategic_initiatives=strategic_initiatives, + action_items=action_items + ) + +@indexify_function(image=groq_image) +def summarize_sales_call(speech: Transcription) -> SalesCallSchema: + """Summarize the sales call according to SalesCallSchema.""" + transcription_text = "\n".join([segment.text for segment in speech.segments]) + prompt = f""" + Analyze this sales call transcript and extract data ONLY in this format only: + 1. Key details + 2. Client concerns + 3. Action items + 4. Next steps + 5. Recommendations for improving the approach + + Transcript: + {transcription_text} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! + """ + summary = get_chat_completion(prompt) + lines = summary.split('\n') + customer_pain_points = [] + proposed_solutions = [] + objections = [] + next_steps = [] + current_list = None + for line in lines: + line = line.strip() + if line.startswith('1. Customer Pain Points:'): + current_list = customer_pain_points + elif line.startswith('2. Proposed Solutions:'): + current_list = proposed_solutions + elif line.startswith('3. Objections:'): + current_list = objections + elif line.startswith('4. Next Steps:'): + current_list = next_steps + elif current_list is not None and line: + current_list.append(line.rstrip(', ')) + + # Ensure all required fields are populated + return SalesCallSchema( + meeting_id=f"RD-{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}", + date=datetime.datetime.now(), + duration=sum((segment.end_ts - segment.start_ts) for segment in speech.segments), + participants=[segment.speaker for segment in speech.segments if segment.speaker], + meeting_type="Sales Call", + summary=summary, + customer_pain_points=customer_pain_points, + proposed_solutions=proposed_solutions, + objections=objections, + next_steps=next_steps + ) + +@indexify_function(image=groq_image) +def summarize_rd_brainstorm(speech: Transcription) -> RDBrainstormSchema: + """Summarize the R&D brainstorming session according to RDBrainstormSchema.""" + transcription_text = "\n".join([segment.text for segment in speech.segments]) + prompt = f""" + Analyze this sales call transcript and extract data ONLY in this format only: + 1. Innovative Ideas: + 2. Technical Challenges: + 3. Resource Requirements: + 4. Potential Impacts: + + Transcript: + {transcription_text} + DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! + """ + summary = get_chat_completion(prompt) + lines = summary.split('\n') + innovative_ideas = [] + technical_challenges = [] + resource_requirements = [] + potential_impacts = [] + current_list = None + for line in lines: + line = line.strip() + if line.startswith('1. Innovative Ideas:'): + current_list = innovative_ideas + elif line.startswith('2. Technical Challenges:'): + current_list = technical_challenges + elif line.startswith('3. Resource Requirements:'): + current_list = resource_requirements + elif line.startswith('4. Potential Impacts:'): + current_list = potential_impacts + elif current_list is not None and line: + current_list.append(line.rstrip(', ')) + + # Ensure all required fields are populated + return RDBrainstormSchema( + meeting_id=f"RD-{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}", + date=datetime.datetime.now(), + duration=sum((segment.end_ts - segment.start_ts) for segment in speech.segments), + participants=[segment.speaker for segment in speech.segments if segment.speaker], + meeting_type="R&D Brainstorm", + summary=summary, + innovative_ideas=innovative_ideas, + technical_challenges=technical_challenges, + resource_requirements=resource_requirements, + potential_impacts=potential_impacts + ) + +@indexify_router(image=base_image) +def router(speech: Transcription) -> List[Union[summarize_strategy_meeting, summarize_sales_call, summarize_rd_brainstorm]]: + """Route the transcription to the appropriate summarizer based on the classification.""" + if speech.classification and speech.classification.classification == "strategy-meeting": + return [summarize_strategy_meeting] + elif speech.classification and speech.classification.classification in ["sales-call", "marketing-call", "product-call"]: + return [summarize_sales_call] + elif speech.classification and speech.classification.classification == "rd-brainstorm": + return [summarize_rd_brainstorm] + return [] + +def create_graph(): + g = Graph("Conversation_Extractor", start_node=transcribe_audio) + g.add_edge(transcribe_audio, classify_meeting_intent) + g.add_edge(classify_meeting_intent, router) + g.route(router, [summarize_strategy_meeting, summarize_sales_call, summarize_rd_brainstorm]) + return g + +def deploy_graphs(server_url: str): + graph = create_graph() + RemoteGraph.deploy(graph, server_url=server_url) + logging.info("Graph deployed successfully") + +def run_workflow(mode: str, server_url: str = 'http://localhost:8950', bucket_name: str = None, file_key: str = None): + if mode == 'in-process-run': + graph = create_graph() + elif mode == 'remote-run': + graph = Graph.by_name("Conversation_Extractor", server_url=server_url) + else: + raise ValueError("Invalid mode. Choose 'in-process-run' or 'remote-run'.") + + file_path = "data/audiofile" #replace + with open(file_path, 'rb') as audio_file: + audio_data = audio_file.read() + file = File(path=file_path, data=audio_data) + + invocation_id = graph.run(block_until_done=True, file=file) + + logging.info(f"Retrieving transcription for {invocation_id}") + transcription = graph.output(invocation_id=invocation_id, fn_name=transcribe_audio.name)[0] + for segment in transcription.segments: + logging.info(f"{round(segment.start_ts, 2)} - {round(segment.end_ts, 2)}: {segment.text}") + + try: + classification = graph.output(invocation_id=invocation_id, fn_name=classify_meeting_intent.name)[0].classification + logging.info(f"Transcription Classification: {classification.classification}") + + if classification.classification == "strategy-meeting": + summary = graph.output(invocation_id=invocation_id, fn_name=summarize_strategy_meeting.name)[0] + elif classification.classification in ["sales-call", "marketing-call", "product-call"]: + summary = graph.output(invocation_id=invocation_id, fn_name=summarize_sales_call.name)[0] + elif classification.classification == "rd-brainstorm": + summary = graph.output(invocation_id=invocation_id, fn_name=summarize_rd_brainstorm.name)[0] + else: + logging.warning(f"No suitable summarization found for the classification: {classification.classification}") + return + + logging.info(f"\nExtracted information:\n{summary.summary}") + except Exception as e: + logging.error(f"Error in workflow execution: {str(e)}") + logging.error(f"Graph output for classify_meeting_intent: {graph.output(invocation_id=invocation_id, fn_name=classify_meeting_intent.name)}") + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description="Run Conversation Extractor") + parser.add_argument('--mode', choices=['in-process-run', 'remote-deploy', 'remote-run'], required=True, + help='Mode of operation: in-process-run, remote-deploy, or remote-run') + parser.add_argument('--server-url', default='http://localhost:8900', help='Indexify server URL for remote mode or deployment') + args = parser.parse_args() + try: + if args.mode == 'remote-deploy': + deploy_graphs(args.server_url) + elif args.mode in ['in-process-run', 'remote-run']: + run_workflow(args.mode, args.server_url) + logging.info("Operation completed successfully!") + except Exception as e: + logging.error(f"An error occurred during execution: {str(e)}") + + + + + + + + + + + + From 3bc15b9e969d79161af135ee893ff67d36d0c8c5 Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Wed, 6 Nov 2024 21:59:58 +0530 Subject: [PATCH 06/10] added docker-compose.yml --- .../docker-compose.yml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 examples/conversation_extraction/docker-compose.yml diff --git a/examples/conversation_extraction/docker-compose.yml b/examples/conversation_extraction/docker-compose.yml new file mode 100644 index 000000000..6ffe40e3c --- /dev/null +++ b/examples/conversation_extraction/docker-compose.yml @@ -0,0 +1,48 @@ +networks: + server: +services: + indexify: + image: tensorlake/indexify-server + ports: + - 8900:8900 + networks: + server: + aliases: + - indexify-server + volumes: + - data:/app + + audio-processor: + image: tensorlake/audio-processor:latest + command: ["indexify-cli", "executor", "--server-addr", "indexify:8900"] + networks: + server: + volumes: + - data:/app + + transcriber: + image: tensorlake/transcriber:latest + command: ["indexify-cli", "executor", "--server-addr", "indexify:8900"] + networks: + server: + volumes: + - data:/app + + router: + image: tensorlake/router:latest + command: ["indexify-cli", "executor", "--server-addr", "indexify:8900"] + networks: + server: + volumes: + - data:/app + + llama-cpp: + image: tensorlake/llama-cpp:latest + command: ["indexify-cli", "executor", "--server-addr", "indexify:8900"] + networks: + server: + volumes: + - data:/app + +volumes: + data: \ No newline at end of file From e91410b13bd81d8eceadc08c8bca4833970abbf7 Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Wed, 6 Nov 2024 22:03:52 +0530 Subject: [PATCH 07/10] added requirements --- examples/conversation_extraction/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 examples/conversation_extraction/requirements.txt diff --git a/examples/conversation_extraction/requirements.txt b/examples/conversation_extraction/requirements.txt new file mode 100644 index 000000000..52bceb5f4 --- /dev/null +++ b/examples/conversation_extraction/requirements.txt @@ -0,0 +1,4 @@ +indexify +faster_whisper +llama_cpp_python +rich \ No newline at end of file From 48ff9dac4a0bbd9fce94a5d45e58029072f51f5f Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Wed, 6 Nov 2024 22:42:15 +0530 Subject: [PATCH 08/10] added readme --- examples/conversation_extraction/README.md | 92 ++++++++++++++++++++++ examples/conversation_extraction/main.py | 2 +- 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 examples/conversation_extraction/README.md diff --git a/examples/conversation_extraction/README.md b/examples/conversation_extraction/README.md new file mode 100644 index 000000000..d53188841 --- /dev/null +++ b/examples/conversation_extraction/README.md @@ -0,0 +1,92 @@ +# Meeting Conversation Extractor with Indexify + +This project demonstrates how to build a meeting conversation extraction pipeline using Indexify. The pipeline processes audio files, transcribes them, classifies the content, and generates structured summaries based on the meeting type. + +## Features + +- Speech-to-text transcription using Faster Whisper +- Meeting type classification using Llama.cpp +- Structured summaries for different meeting types: + - Strategy meetings + - Sales/Marketing/Product calls + - R&D brainstorming sessions + +## Prerequisites + +- Python 3.9+ +- Docker and Docker Compose (for containerized setup) + +## Installation and Usage + +### Option 1: Local Installation - In Process + +1. Clone this repository: + ``` + git clone https://github.com/tensorlakeai/indexify + cd indexify/examples/conversation_extraction + ``` + +2. Create a virtual environment and activate it: + ``` + python -m venv venv + source venv/bin/activate + ``` + +3. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` + +4. Run the main script: + ``` + python main.py --mode in-process-run + ``` + +### Option 2: Using Docker Compose - Deployed Graph + +1. Clone this repository: + ``` + git clone https://github.com/tensorlakeai/indexify + cd indexify/examples/conversation_extraction + ``` + +2. Ensure Docker and Docker Compose are installed on your system. + +3. Build the Docker images for each function in the pipeline. + +4. Start the services: + ``` + docker-compose up --build + ``` + +5. Deploy the graph: + ``` + python main.py --mode remote-deploy + ``` + +6. Run the workflow: + ``` + python main.py --mode remote-run + ``` + +## How it Works + +1. **Audio Processing:** + - Transcription: Converts speech to text using Faster Whisper, including speaker diarization + - Meeting Classification: Uses Llama.cpp to determine the type of meeting + +2. **Content Analysis:** + Based on the meeting type classification, the system generates structured summaries: + - Strategy Meetings: Key decisions, action items, and strategic initiatives + - Sales/Marketing/Product Calls: Customer details, pain points, and next steps + - R&D Brainstorms: Innovative ideas, technical challenges, resource requirements, and potential impacts + +## Graph Structure + +The project uses the following Indexify graph: + +``` +transcribe_audio -> classify_meeting_intent -> router -> summarize_strategy_meeting + -> summarize_sales_call + -> summarize_rd_brainstorm +``` diff --git a/examples/conversation_extraction/main.py b/examples/conversation_extraction/main.py index 8b6e28ec1..a2e69f412 100644 --- a/examples/conversation_extraction/main.py +++ b/examples/conversation_extraction/main.py @@ -298,7 +298,7 @@ def deploy_graphs(server_url: str): RemoteGraph.deploy(graph, server_url=server_url) logging.info("Graph deployed successfully") -def run_workflow(mode: str, server_url: str = 'http://localhost:8950', bucket_name: str = None, file_key: str = None): +def run_workflow(mode: str, server_url: str = 'http://localhost:8900', bucket_name: str = None, file_key: str = None): if mode == 'in-process-run': graph = create_graph() elif mode == 'remote-run': From 24a267028b177eb32f5085460c5aedc4dccf55ba Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Thu, 7 Nov 2024 23:45:11 +0530 Subject: [PATCH 09/10] typos and corrections --- examples/conversation_extraction/README.md | 4 +-- examples/conversation_extraction/main.py | 35 +++++++++++----------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/examples/conversation_extraction/README.md b/examples/conversation_extraction/README.md index d53188841..ae1717ab7 100644 --- a/examples/conversation_extraction/README.md +++ b/examples/conversation_extraction/README.md @@ -69,11 +69,11 @@ This project demonstrates how to build a meeting conversation extraction pipelin python main.py --mode remote-run ``` -## How it Works +## Workflow 1. **Audio Processing:** - Transcription: Converts speech to text using Faster Whisper, including speaker diarization - - Meeting Classification: Uses Llama.cpp to determine the type of meeting + - Meeting Classification: Uses LLM to determine the type of meeting 2. **Content Analysis:** Based on the meeting type classification, the system generates structured summaries: diff --git a/examples/conversation_extraction/main.py b/examples/conversation_extraction/main.py index a2e69f412..50b9c9c75 100644 --- a/examples/conversation_extraction/main.py +++ b/examples/conversation_extraction/main.py @@ -77,6 +77,7 @@ class RDBrainstormSchema(BaseConversationSchema): .run("apt-get purge -y build-essential && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*") ) + def get_chat_completion(prompt: str) -> str: model = Llama.from_pretrained( repo_id="NousResearch/Hermes-3-Llama-3.1-8B-GGUF", @@ -96,7 +97,7 @@ def transcribe_audio(file: File) -> Transcription: audio_segments = [SpeechSegment(text=segment.text, start_ts=segment.start, end_ts=segment.end) for segment in segments] return Transcription(segments=audio_segments) -@indexify_function(image=groq_image) +@indexify_function(image=llama_cpp_image) def classify_meeting_intent(speech: Transcription) -> Transcription: try: """Classify the intent of the audio.""" @@ -112,7 +113,7 @@ def classify_meeting_intent(speech: Transcription) -> Transcription: Transcription: {transcription_text} - DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! ONLY the Intent in the required format. + DO NOT ATTACH ANY OTHER PHRASES, symbols OR ANNOTATIONS WITH THE OUTPUT! ONLY the Intent in the required format. """ response = get_chat_completion(prompt) intent = response.split(":")[-1].strip() @@ -120,14 +121,14 @@ def classify_meeting_intent(speech: Transcription) -> Transcription: speech.classification = SpeechClassification(classification=intent, confidence=0.8) else: speech.classification = SpeechClassification(classification="unknown", confidence=0.5) - + return speech except Exception as e: logging.error(f"Error classifying meeting intent: {e}") speech.classification = SpeechClassification(classification="unknown", confidence=0.5) return speech -@indexify_function(image=groq_image) +@indexify_function(image=llama_cpp_image) def summarize_strategy_meeting(speech: Transcription) -> StrategyMeetingSchema: """Summarize the strategy meeting.""" transcription_text = "\n".join([segment.text for segment in speech.segments]) @@ -140,7 +141,7 @@ def summarize_strategy_meeting(speech: Transcription) -> StrategyMeetingSchema: Transcript: {transcription_text} - DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! + DO NOT ATTACH ANY OTHER PHRASES, symbols OR ANNOTATIONS WITH THE OUTPUT! """ summary = get_chat_completion(prompt) lines = summary.split('\n') @@ -176,21 +177,20 @@ def summarize_strategy_meeting(speech: Transcription) -> StrategyMeetingSchema: action_items=action_items ) -@indexify_function(image=groq_image) +@indexify_function(image=llama_cpp_image) def summarize_sales_call(speech: Transcription) -> SalesCallSchema: """Summarize the sales call according to SalesCallSchema.""" transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" Analyze this sales call transcript and extract data ONLY in this format only: - 1. Key details - 2. Client concerns - 3. Action items - 4. Next steps - 5. Recommendations for improving the approach + 1. Customer Pain Points: + 2. Proposed Solutions: + 3. Objections: + 4. Next Steps: Transcript: {transcription_text} - DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! + DO NOT ATTACH ANY OTHER PHRASES, symbols OR ANNOTATIONS WITH THE OUTPUT! """ summary = get_chat_completion(prompt) lines = summary.split('\n') @@ -226,12 +226,12 @@ def summarize_sales_call(speech: Transcription) -> SalesCallSchema: next_steps=next_steps ) -@indexify_function(image=groq_image) +@indexify_function(image=llama_cpp_image) def summarize_rd_brainstorm(speech: Transcription) -> RDBrainstormSchema: """Summarize the R&D brainstorming session according to RDBrainstormSchema.""" transcription_text = "\n".join([segment.text for segment in speech.segments]) prompt = f""" - Analyze this sales call transcript and extract data ONLY in this format only: + Analyze this R&D brainstorming transcript and extract data ONLY in this format only: 1. Innovative Ideas: 2. Technical Challenges: 3. Resource Requirements: @@ -239,7 +239,7 @@ def summarize_rd_brainstorm(speech: Transcription) -> RDBrainstormSchema: Transcript: {transcription_text} - DO NOT ATTATCH ANY OTHER PHRASES,* symbols OR ANNOTATIONS WITH THE OUTPUT! + DO NOT ATTACH ANY OTHER PHRASES, symbols OR ANNOTATIONS WITH THE OUTPUT! """ summary = get_chat_completion(prompt) lines = summary.split('\n') @@ -306,7 +306,7 @@ def run_workflow(mode: str, server_url: str = 'http://localhost:8900', bucket_na else: raise ValueError("Invalid mode. Choose 'in-process-run' or 'remote-run'.") - file_path = "data/audiofile" #replace + file_path = "data/audiofile" #replace with your file path with open(file_path, 'rb') as audio_file: audio_data = audio_file.read() file = File(path=file_path, data=audio_data) @@ -337,6 +337,7 @@ def run_workflow(mode: str, server_url: str = 'http://localhost:8900', bucket_na logging.error(f"Error in workflow execution: {str(e)}") logging.error(f"Graph output for classify_meeting_intent: {graph.output(invocation_id=invocation_id, fn_name=classify_meeting_intent.name)}") + if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Run Conversation Extractor") @@ -362,5 +363,3 @@ def run_workflow(mode: str, server_url: str = 'http://localhost:8900', bucket_na - - From cdaa1620801f50ccb293958f87e34dc938362101 Mon Sep 17 00:00:00 2001 From: Ashish-Abraham Date: Sun, 10 Nov 2024 23:03:15 +0530 Subject: [PATCH 10/10] fixes and typos --- examples/conversation_extraction/main.py | 47 +++++++++++++++++++----- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/examples/conversation_extraction/main.py b/examples/conversation_extraction/main.py index 50b9c9c75..0e520b56b 100644 --- a/examples/conversation_extraction/main.py +++ b/examples/conversation_extraction/main.py @@ -11,7 +11,6 @@ import re import datetime from indexify import RemoteGraph -from dotenv import load_dotenv import os from llama_cpp import Llama @@ -24,7 +23,6 @@ class BaseConversationSchema(BaseModel): duration: int participants: List[str] meeting_type: str - summary: str class Config: arbitrary_types_allowed = True @@ -77,7 +75,6 @@ class RDBrainstormSchema(BaseConversationSchema): .run("apt-get purge -y build-essential && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*") ) - def get_chat_completion(prompt: str) -> str: model = Llama.from_pretrained( repo_id="NousResearch/Hermes-3-Llama-3.1-8B-GGUF", @@ -306,7 +303,7 @@ def run_workflow(mode: str, server_url: str = 'http://localhost:8900', bucket_na else: raise ValueError("Invalid mode. Choose 'in-process-run' or 'remote-run'.") - file_path = "data/audiofile" #replace with your file path + file_path = "data/audiofile" #replace with your file path with open(file_path, 'rb') as audio_file: audio_data = audio_file.read() file = File(path=file_path, data=audio_data) @@ -323,16 +320,48 @@ def run_workflow(mode: str, server_url: str = 'http://localhost:8900', bucket_na logging.info(f"Transcription Classification: {classification.classification}") if classification.classification == "strategy-meeting": - summary = graph.output(invocation_id=invocation_id, fn_name=summarize_strategy_meeting.name)[0] + result = graph.output(invocation_id=invocation_id, fn_name=summarize_strategy_meeting.name)[0] + logging.info("\nExtracted information:") + print(f"Meeting ID: {result.meeting_id}") + print(f"Date: {result.date.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"Duration: {result.duration} seconds") + print(f"Participants: {repr(result.participants) if result.participants else 'None'}") + print(f"Meeting Type: {result.meeting_type}") + print(f"Key Decisions: {repr(result.key_decisions)}") + print(f"Risk Assessments: {repr(result.risk_assessments)}") + print(f"Strategic Initiatives: {repr(result.strategic_initiatives)}") + print(f"Action Items: {repr(result.action_items)}") + elif classification.classification in ["sales-call", "marketing-call", "product-call"]: - summary = graph.output(invocation_id=invocation_id, fn_name=summarize_sales_call.name)[0] + result = graph.output(invocation_id=invocation_id, fn_name=summarize_sales_call.name)[0] + logging.info("\nExtracted information:") + print(f"Meeting ID: {result.meeting_id}") + print(f"Date: {result.date.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"Duration: {result.duration} seconds") + print(f"Participants: {repr(result.participants) if result.participants else 'None'}") + print(f"Meeting Type: {result.meeting_type}") + print(f"Customer Pain Points: {repr(result.customer_pain_points)}") + print(f"Proposed Solutions: {repr(result.proposed_solutions)}") + print(f"Objections: {repr(result.objections)}") + print(f"Next Steps: {repr(result.next_steps)}") + elif classification.classification == "rd-brainstorm": - summary = graph.output(invocation_id=invocation_id, fn_name=summarize_rd_brainstorm.name)[0] + result = graph.output(invocation_id=invocation_id, fn_name=summarize_rd_brainstorm.name)[0] + logging.info("\nExtracted information:") + print(f"Meeting ID: {result.meeting_id}") + print(f"Date: {result.date.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"Duration: {result.duration} seconds") + print(f"Participants: {repr(result.participants) if result.participants else 'None'}") + print(f"Meeting Type: {result.meeting_type}") + print(f"Innovative Ideas: {repr(result.innovative_ideas)}") + print(f"Technical Challenges: {repr(result.technical_challenges)}") + print(f"Resource Requirements: {repr(result.resource_requirements)}") + print(f"Potential Impacts: {repr(result.potential_impacts)}") + else: logging.warning(f"No suitable summarization found for the classification: {classification.classification}") return - - logging.info(f"\nExtracted information:\n{summary.summary}") + except Exception as e: logging.error(f"Error in workflow execution: {str(e)}") logging.error(f"Graph output for classify_meeting_intent: {graph.output(invocation_id=invocation_id, fn_name=classify_meeting_intent.name)}")