Skip to content

Commit

Permalink
added classes to make it easier
Browse files Browse the repository at this point in the history
  • Loading branch information
tylerprogramming committed Jun 7, 2024
1 parent 4b06e93 commit 3b55ee5
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 74 deletions.
37 changes: 37 additions & 0 deletions _integrations/youtube-summarizer/AudioTranscriber.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import io
import time
from pydub import AudioSegment
import openai


class AudioTranscriber:
def __init__(self, api_key, model='whisper-1', chunk_duration=20):
self.client = openai.OpenAI(api_key=api_key)
self.model = model
self.chunk_duration_ms = chunk_duration * 60 * 1000

def transcribe_audio(self, filename):
audio = AudioSegment.from_file(filename)
transcription = ""
for split_number, i in enumerate(range(0, len(audio), self.chunk_duration_ms)):
chunk = audio[i: i + self.chunk_duration_ms]
file_obj = io.BytesIO(chunk.export(format="mp3").read())
file_obj.name = f"part_{split_number}.mp3"
transcription += self.transcribe_chunk(file_obj, split_number)
return transcription

def transcribe_chunk(self, file_obj, split_number):
print(f"Transcribing part {split_number + 1}!")
attempts = 0
while attempts < 3:
try:
transcript = self.client.audio.transcriptions.create(
model=self.model, file=file_obj
)
return transcript.text
except Exception as e:
attempts += 1
print(f"Attempt {attempts} failed. Exception: {str(e)}")
time.sleep(5)
print("Failed to transcribe after 3 attempts.")
return ""
52 changes: 52 additions & 0 deletions _integrations/youtube-summarizer/AutoGenInteraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import autogen


class AutoGenInteraction:
def __init__(self, api_key, transcription):
self.llm_config = {
"config_list": [{
"model": "gpt-3.5-turbo",
"api_key": api_key
}],
"temperature": 0,
"cache_seed": None
}
self.transcription = transcription

def initiate_chat(self):
user = autogen.UserProxyAgent(
"user",
system_message="You are an administrator.",
human_input_mode="NEVER",
is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
code_execution_config=False,
)

agent_summarizer = autogen.AssistantAgent(
"agent_summarizer",
system_message="You are an assistant agent. When you are done, reply with TERMINATE",
llm_config=self.llm_config
)

agent_translator = autogen.AssistantAgent(
"ai_agent_translator",
system_message="You are an assistant agent. When you are done, reply with TERMINATE",
llm_config=self.llm_config
)

user.initiate_chats(
[
{
"recipient": agent_summarizer,
"message": f"Can you summarize: {self.transcription} this nicely with the title and then bullet points?",
"clear_history": True,
"silent": False,
"summary_method": "last_msg",
},
{
"recipient": agent_translator,
"message": "Can you translate this into Dutch?",
"summary_method": "last_msg",
},
]
)
29 changes: 29 additions & 0 deletions _integrations/youtube-summarizer/YouTubeAudioDownloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import yt_dlp
import os


class YouTubeAudioDownloader:
def __init__(self, url, output_template='%(title)s [%(id)s].%(ext)s'):
self.url = url
self.ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': output_template,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'm4a',
}]
}

def download_audio(self):
with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
info_dict = ydl.extract_info(self.url, download=True)
filename = ydl.prepare_filename(info_dict)
return self.find_file_in_directory(filename)

@staticmethod
def find_file_in_directory(base_filename):
base_name, _ = os.path.splitext(base_filename)
for file in os.listdir('.'):
if file.startswith(base_name):
return file
raise FileNotFoundError(f"The file {base_filename} does not exist in the current directory.")
32 changes: 32 additions & 0 deletions _integrations/youtube-summarizer/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
from dotenv import load_dotenv

from YouTubeAudioDownloader import YouTubeAudioDownloader
from AudioTranscriber import AudioTranscriber
from AutoGenInteraction import AutoGenInteraction

load_dotenv()


def main():
url = 'https://www.youtube.com/watch?v=WqIzUopTPvU'
openai_api_key = os.getenv("OPENAI_API_KEY")

# gives you the filename from yt video
downloader = YouTubeAudioDownloader(url)
filename = downloader.download_audio()
print(f"Downloaded and saved audio (m4a) as: {filename}")

# transcribes the audio
transcriber = AudioTranscriber(api_key=openai_api_key)
transcription = transcriber.transcribe_audio(filename)
print("Transcription completed.")

# ai agents summarize/translate
autogen_interaction = AutoGenInteraction(api_key=openai_api_key, transcription=transcription)
autogen_interaction.initiate_chat()
print("Chat completed.")


if __name__ == "__main__":
main()
74 changes: 0 additions & 74 deletions _integrations/youtube-summarizer/youtube.py

This file was deleted.

0 comments on commit 3b55ee5

Please sign in to comment.