-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathview_writeups.py
81 lines (54 loc) · 2.92 KB
/
view_writeups.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
from pathlib import Path
import logging
logging.basicConfig(level=logging.DEBUG)
import json
import streamlit as st
ROOT_DIR = os.getcwd()
# print(f'ROOT_DIR: {ROOT_DIR}');
# print(f'os.listdir(ROOT_DIR): {os.listdir(ROOT_DIR)}');
with open('team_to_topic.json', 'r') as f:
team_to_topic = json.load(f)
transcripts_path = Path(ROOT_DIR) / 'transcripts'
if not transcripts_path.exists():
print(f'E: Transcripts directory not found!')
st.error("Error: Transcripts directory not found!")
st.stop()
years = os.listdir(transcripts_path)
years = sorted([year for year in years if year != '.DS_Store'])
selected_year = st.sidebar.selectbox('Select a year', years)
presentations = os.listdir(Path(ROOT_DIR) / 'transcripts' / selected_year)
presentations = sorted([presentation for presentation in presentations if presentation != '.DS_Store'])
selected_presentation = st.sidebar.selectbox('Select a presentation', presentations)
files = os.listdir(Path(ROOT_DIR) / 'transcripts' / selected_year / selected_presentation)
# Find transcript file
transcript_file = [file for file in files if file.endswith('_transcript.txt')][0]
# Extract the video ID and create the YouTube URL
video_id = transcript_file.split('_transcript.txt')[0]
youtube_url = f"https://www.youtube.com/watch?v={video_id}"
# Filter to only 'summary_rewrite_*', 'rewrite_gpt-*', and 'summary_*' files
files = [file for file in files if file.startswith('summary_rewrite_') or file.startswith('rewrite_gpt-') or file.startswith('summary_')]
# Sort 'summary_rewrite_gpt-4*' first, then 'summary_rewrite_gpt-3.5*', then 'rewrite_gpt-*', then 'summary_*'
files = sorted(files, key=lambda file: (file.startswith('summary_rewrite_gpt-4'), file.startswith('summary_rewrite_gpt-3.5'), file.startswith('summary_'), file.startswith('rewrite_gpt-')), reverse=True)
st.video(youtube_url)
selected_file = st.sidebar.selectbox('Select a file', files)
file_path = Path(ROOT_DIR) / 'transcripts' / selected_year / selected_presentation / selected_file
if not file_path.is_file():
file_content = f"Error: File not found: {selected_file}"
st.error(f'Error: File not found: {selected_file}')
print(file_content)
else:
with open(file_path) as f:
file_content = f.read()
if selected_presentation in team_to_topic:
title = f'[[{team_to_topic[selected_presentation]}]]'
st.markdown(f"<h1 style='text-align: center;'>{title}</h1>", unsafe_allow_html=True)
st.markdown("[See LDA Visualization](https://davidrd123.github.io/Launch-Summarize-Capstone-YT/index.html)")
st.markdown("[Semantic Search over the Videos](https://launch-semantic-search-capstone-yt.vercel.app/)")
# st.title(f'{selected_presentation} - [[{team_to_topic[selected_presentation]}]]')
st.markdown(file_content)
# import streamlit.components.v1 as components
# with open("lda.html", 'r') as f:
# html_string = f.read()
# print(html_string[:100])
# components.html(html_string, height=600)