Rewrote system for getting youtube transcripts
This commit is contained in:
@@ -4,7 +4,7 @@ Working on organizing the code
|
||||
"""
|
||||
|
||||
import os, requests
|
||||
from pytube import YouTube
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
from html2text import html2text
|
||||
from .internal import cache_dir
|
||||
|
||||
@@ -18,25 +18,35 @@ def connect_remote(remote_url:str, bearer_token:str):
|
||||
window.model_manager.update_local_list()
|
||||
window.save_server_config()
|
||||
|
||||
def attach_youtube(video_url:str, caption_name:str):
|
||||
def attach_youtube(video_title:str, video_author:str, watch_url:str, video_url:str, video_id:str, caption_name:str):
|
||||
buffer = window.message_text_view.get_buffer()
|
||||
text = buffer.get_text(buffer.get_start_iter(), buffer.get_end_iter(), False).replace(video_url, "")
|
||||
buffer.delete(buffer.get_start_iter(), buffer.get_end_iter())
|
||||
buffer.insert(buffer.get_start_iter(), text, len(text))
|
||||
|
||||
yt = YouTube(video_url)
|
||||
text = "{}\n{}\n{}\n\n".format(yt.title, yt.author, yt.watch_url)
|
||||
result_text = "{}\n{}\n{}\n\n".format(video_title, video_author, watch_url)
|
||||
caption_name = caption_name.split(' (')[-1][:-1]
|
||||
|
||||
if caption_name.startswith('Translate:'):
|
||||
original_caption_name = get_youtube_transcripts(video_id)[0].split(' (')[-1][:-1]
|
||||
transcript = YouTubeTranscriptApi.list_transcripts(video_id).find_transcript([original_caption_name]).translate(caption_name.split(':')[-1]).fetch()
|
||||
result_text += '(Auto translated from Japanese)\n'
|
||||
else:
|
||||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[caption_name])
|
||||
|
||||
result_text += '\n'.join([t['text'] for t in transcript])
|
||||
|
||||
for event in yt.captions[caption_name.split('(')[-1][:-1]].json_captions['events']:
|
||||
text += "{}\n".format(event['segs'][0]['utf8'].replace('\n', '\\n'))
|
||||
if not os.path.exists(os.path.join(cache_dir, 'tmp/youtube')):
|
||||
os.makedirs(os.path.join(cache_dir, 'tmp/youtube'))
|
||||
file_path = os.path.join(os.path.join(cache_dir, 'tmp/youtube'), f'{yt.title} ({caption_name.split(" (")[0]})')
|
||||
file_path = os.path.join(os.path.join(cache_dir, 'tmp/youtube'), '{} ({})'.format(video_title.replace('/', ' '), caption_name))
|
||||
with open(file_path, 'w+', encoding="utf-8") as f:
|
||||
f.write(text)
|
||||
f.write(result_text)
|
||||
|
||||
window.attach_file(file_path, 'youtube')
|
||||
|
||||
def get_youtube_transcripts(video_id:str):
|
||||
return ['{} ({})'.format(t.language, t.language_code) for t in YouTubeTranscriptApi.list_transcripts(video_id)]
|
||||
|
||||
def attach_website(url:str):
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
|
||||
@@ -69,7 +69,7 @@ class AlpacaWindow(Adw.ApplicationWindow):
|
||||
preferences_dialog = Gtk.Template.Child()
|
||||
shortcut_window : Gtk.ShortcutsWindow = Gtk.Template.Child()
|
||||
file_preview_dialog = Gtk.Template.Child()
|
||||
file_preview_text_view = Gtk.Template.Child()
|
||||
file_preview_text_label = Gtk.Template.Child()
|
||||
file_preview_image = Gtk.Template.Child()
|
||||
welcome_dialog = Gtk.Template.Child()
|
||||
welcome_carousel = Gtk.Template.Child()
|
||||
@@ -405,7 +405,7 @@ class AlpacaWindow(Adw.ApplicationWindow):
|
||||
if content:
|
||||
if file_type == 'image':
|
||||
self.file_preview_image.set_visible(True)
|
||||
self.file_preview_text_view.set_visible(False)
|
||||
self.file_preview_text_label.set_visible(False)
|
||||
image_data = base64.b64decode(content)
|
||||
loader = GdkPixbuf.PixbufLoader.new()
|
||||
loader.write(image_data)
|
||||
@@ -418,10 +418,8 @@ class AlpacaWindow(Adw.ApplicationWindow):
|
||||
self.file_preview_open_button.set_name(file_path)
|
||||
else:
|
||||
self.file_preview_image.set_visible(False)
|
||||
self.file_preview_text_view.set_visible(True)
|
||||
buffer = self.file_preview_text_view.get_buffer()
|
||||
buffer.delete(buffer.get_start_iter(), buffer.get_end_iter())
|
||||
buffer.insert(buffer.get_start_iter(), content, len(content.encode('utf-8')))
|
||||
self.file_preview_text_label.set_visible(True)
|
||||
buffer = self.file_preview_text_label.set_label(content)
|
||||
if file_type == 'youtube':
|
||||
self.file_preview_dialog.set_title(content.split('\n')[0])
|
||||
self.file_preview_open_button.set_name(content.split('\n')[2])
|
||||
@@ -760,20 +758,23 @@ Generate a title following these rules:
|
||||
if youtube_regex.match(text):
|
||||
try:
|
||||
yt = YouTube(text)
|
||||
captions = yt.captions
|
||||
if len(captions) == 0:
|
||||
transcriptions = generic_actions.get_youtube_transcripts(yt.video_id)
|
||||
if len(transcriptions) == 0:
|
||||
self.show_toast(_("This video does not have any transcriptions"), self.main_overlay)
|
||||
return
|
||||
video_title = yt.title
|
||||
|
||||
if not any(filter(lambda x: '(en' in x, transcriptions)):
|
||||
transcriptions.insert(0, 'English (Translate:en)')
|
||||
|
||||
dialog_widget.simple_dropdown(
|
||||
_('Attach YouTube Video?'),
|
||||
_('{}\n\nPlease select a transcript to include').format(video_title),
|
||||
lambda caption_name, video_url=text: generic_actions.attach_youtube(video_url, caption_name),
|
||||
["{} ({})".format(caption.name.title(), caption.code) for caption in captions]
|
||||
_('{}\n\nPlease select a transcript to include').format(yt.streams[0].title),
|
||||
lambda caption_name, yt=yt, video_url=text: generic_actions.attach_youtube(yt.streams[0].title, yt.author, yt.watch_url, video_url, yt.video_id, caption_name),
|
||||
transcriptions
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.show_toast(_("This video is not available"), self.main_overlay)
|
||||
self.show_toast(_("Error attaching video, please try again"), self.main_overlay)
|
||||
elif url_regex.match(text):
|
||||
dialog_widget.simple(
|
||||
_('Attach Website? (Experimental)'),
|
||||
|
||||
@@ -884,14 +884,13 @@
|
||||
<child>
|
||||
<object class="GtkBox">
|
||||
<child>
|
||||
<object class="GtkTextView" id="file_preview_text_view">
|
||||
<object class="GtkLabel" id="file_preview_text_label">
|
||||
<property name="margin-top">12</property>
|
||||
<property name="margin-bottom">12</property>
|
||||
<property name="margin-start">12</property>
|
||||
<property name="margin-end">12</property>
|
||||
<property name="hexpand">true</property>
|
||||
<property name="vexpand">true</property>
|
||||
<property name="editable">false</property>
|
||||
<property name="selectable">true</property>
|
||||
</object>
|
||||
</child>
|
||||
<child>
|
||||
|
||||
Reference in New Issue
Block a user