Rewrote system for getting youtube transcripts
This commit is contained in:
		
							parent
							
								
									134a907eff
								
							
						
					
					
						commit
						218c10f4ad
					
				| @ -111,6 +111,45 @@ | ||||
| 		} | ||||
| 	    ] | ||||
| 	}, | ||||
| 	{ | ||||
| 	    "name": "python3-youtube-transcript-api", | ||||
| 	    "buildsystem": "simple", | ||||
| 	    "build-commands": [ | ||||
| 		"pip3 install --verbose --exists-action=i --no-index --find-links=\"file://${PWD}\" --prefix=${FLATPAK_DEST} \"youtube-transcript-api\" --no-build-isolation" | ||||
| 	    ], | ||||
| 	    "sources": [ | ||||
| 		{ | ||||
| 		    "type": "file", | ||||
| 		    "url": "https://files.pythonhosted.org/packages/12/90/3c9ff0512038035f59d279fddeb79f5f1eccd8859f06d6163c58798b9487/certifi-2024.8.30-py3-none-any.whl", | ||||
| 		    "sha256": "922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8" | ||||
| 		}, | ||||
| 		{ | ||||
| 		    "type": "file", | ||||
| 		    "url": "https://files.pythonhosted.org/packages/f2/4f/e1808dc01273379acc506d18f1504eb2d299bd4131743b9fc54d7be4df1e/charset_normalizer-3.4.0.tar.gz", | ||||
| 		    "sha256": "223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e" | ||||
| 		}, | ||||
| 		{ | ||||
| 		    "type": "file", | ||||
| 		    "url": "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", | ||||
| 		    "sha256": "946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3" | ||||
| 		}, | ||||
| 		{ | ||||
| 		    "type": "file", | ||||
| 		    "url": "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", | ||||
| 		    "sha256": "70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" | ||||
| 		}, | ||||
| 		{ | ||||
| 		    "type": "file", | ||||
| 		    "url": "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", | ||||
| 		    "sha256": "ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac" | ||||
| 		}, | ||||
| 		{ | ||||
| 		    "type": "file", | ||||
| 		    "url": "https://files.pythonhosted.org/packages/52/42/5f57d37d56bdb09722f226ed81cc1bec63942da745aa27266b16b0e16a5d/youtube_transcript_api-0.6.2-py3-none-any.whl", | ||||
| 		    "sha256": "019dbf265c6a68a0591c513fff25ed5a116ce6525832aefdfb34d4df5567121c" | ||||
| 		} | ||||
| 	    ] | ||||
| 	}, | ||||
| 	{ | ||||
| 	    "name": "python3-html2text", | ||||
| 	    "buildsystem": "simple", | ||||
|  | ||||
| @ -4,7 +4,7 @@ Working on organizing the code | ||||
| """ | ||||
| 
 | ||||
| import os, requests | ||||
| from pytube import YouTube | ||||
| from youtube_transcript_api import YouTubeTranscriptApi | ||||
| from html2text import html2text | ||||
| from .internal import cache_dir | ||||
| 
 | ||||
| @ -18,25 +18,35 @@ def connect_remote(remote_url:str, bearer_token:str): | ||||
|     window.model_manager.update_local_list() | ||||
|     window.save_server_config() | ||||
| 
 | ||||
| def attach_youtube(video_url:str, caption_name:str): | ||||
| def attach_youtube(video_title:str, video_author:str, watch_url:str, video_url:str, video_id:str, caption_name:str): | ||||
|     buffer = window.message_text_view.get_buffer() | ||||
|     text = buffer.get_text(buffer.get_start_iter(), buffer.get_end_iter(), False).replace(video_url, "") | ||||
|     buffer.delete(buffer.get_start_iter(), buffer.get_end_iter()) | ||||
|     buffer.insert(buffer.get_start_iter(), text, len(text)) | ||||
| 
 | ||||
|     yt = YouTube(video_url) | ||||
|     text = "{}\n{}\n{}\n\n".format(yt.title, yt.author, yt.watch_url) | ||||
|     result_text = "{}\n{}\n{}\n\n".format(video_title, video_author, watch_url) | ||||
|     caption_name = caption_name.split(' (')[-1][:-1] | ||||
| 
 | ||||
|     if caption_name.startswith('Translate:'): | ||||
|         original_caption_name = get_youtube_transcripts(video_id)[0].split(' (')[-1][:-1] | ||||
|         transcript = YouTubeTranscriptApi.list_transcripts(video_id).find_transcript([original_caption_name]).translate(caption_name.split(':')[-1]).fetch() | ||||
|         result_text += '(Auto translated from Japanese)\n' | ||||
|     else: | ||||
|         transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[caption_name]) | ||||
| 
 | ||||
|     result_text += '\n'.join([t['text'] for t in transcript]) | ||||
| 
 | ||||
|     for event in yt.captions[caption_name.split('(')[-1][:-1]].json_captions['events']: | ||||
|         text += "{}\n".format(event['segs'][0]['utf8'].replace('\n', '\\n')) | ||||
|     if not os.path.exists(os.path.join(cache_dir, 'tmp/youtube')): | ||||
|         os.makedirs(os.path.join(cache_dir, 'tmp/youtube')) | ||||
|     file_path = os.path.join(os.path.join(cache_dir, 'tmp/youtube'), f'{yt.title} ({caption_name.split(" (")[0]})') | ||||
|     file_path = os.path.join(os.path.join(cache_dir, 'tmp/youtube'), '{} ({})'.format(video_title.replace('/', ' '), caption_name)) | ||||
|     with open(file_path, 'w+', encoding="utf-8") as f: | ||||
|         f.write(text) | ||||
|         f.write(result_text) | ||||
| 
 | ||||
|     window.attach_file(file_path, 'youtube') | ||||
| 
 | ||||
| def get_youtube_transcripts(video_id:str): | ||||
|     return ['{} ({})'.format(t.language, t.language_code) for t in YouTubeTranscriptApi.list_transcripts(video_id)] | ||||
| 
 | ||||
| def attach_website(url:str): | ||||
|     response = requests.get(url) | ||||
|     if response.status_code == 200: | ||||
|  | ||||
| @ -69,7 +69,7 @@ class AlpacaWindow(Adw.ApplicationWindow): | ||||
|     preferences_dialog = Gtk.Template.Child() | ||||
|     shortcut_window : Gtk.ShortcutsWindow  = Gtk.Template.Child() | ||||
|     file_preview_dialog = Gtk.Template.Child() | ||||
|     file_preview_text_view = Gtk.Template.Child() | ||||
|     file_preview_text_label = Gtk.Template.Child() | ||||
|     file_preview_image = Gtk.Template.Child() | ||||
|     welcome_dialog = Gtk.Template.Child() | ||||
|     welcome_carousel = Gtk.Template.Child() | ||||
| @ -405,7 +405,7 @@ class AlpacaWindow(Adw.ApplicationWindow): | ||||
|         if content: | ||||
|             if file_type == 'image': | ||||
|                 self.file_preview_image.set_visible(True) | ||||
|                 self.file_preview_text_view.set_visible(False) | ||||
|                 self.file_preview_text_label.set_visible(False) | ||||
|                 image_data = base64.b64decode(content) | ||||
|                 loader = GdkPixbuf.PixbufLoader.new() | ||||
|                 loader.write(image_data) | ||||
| @ -418,10 +418,8 @@ class AlpacaWindow(Adw.ApplicationWindow): | ||||
|                 self.file_preview_open_button.set_name(file_path) | ||||
|             else: | ||||
|                 self.file_preview_image.set_visible(False) | ||||
|                 self.file_preview_text_view.set_visible(True) | ||||
|                 buffer = self.file_preview_text_view.get_buffer() | ||||
|                 buffer.delete(buffer.get_start_iter(), buffer.get_end_iter()) | ||||
|                 buffer.insert(buffer.get_start_iter(), content, len(content.encode('utf-8'))) | ||||
|                 self.file_preview_text_label.set_visible(True) | ||||
|                 buffer = self.file_preview_text_label.set_label(content) | ||||
|                 if file_type == 'youtube': | ||||
|                     self.file_preview_dialog.set_title(content.split('\n')[0]) | ||||
|                     self.file_preview_open_button.set_name(content.split('\n')[2]) | ||||
| @ -760,20 +758,23 @@ Generate a title following these rules: | ||||
|             if youtube_regex.match(text): | ||||
|                 try: | ||||
|                     yt = YouTube(text) | ||||
|                     captions = yt.captions | ||||
|                     if len(captions) == 0: | ||||
|                     transcriptions = generic_actions.get_youtube_transcripts(yt.video_id) | ||||
|                     if len(transcriptions) == 0: | ||||
|                         self.show_toast(_("This video does not have any transcriptions"), self.main_overlay) | ||||
|                         return | ||||
|                     video_title = yt.title | ||||
| 
 | ||||
|                     if not any(filter(lambda x: '(en' in x, transcriptions)): | ||||
|                         transcriptions.insert(0, 'English (Translate:en)') | ||||
| 
 | ||||
|                     dialog_widget.simple_dropdown( | ||||
|                         _('Attach YouTube Video?'), | ||||
|                         _('{}\n\nPlease select a transcript to include').format(video_title), | ||||
|                         lambda caption_name, video_url=text: generic_actions.attach_youtube(video_url, caption_name), | ||||
|                         ["{} ({})".format(caption.name.title(), caption.code) for caption in captions] | ||||
|                         _('{}\n\nPlease select a transcript to include').format(yt.streams[0].title), | ||||
|                         lambda caption_name, yt=yt, video_url=text: generic_actions.attach_youtube(yt.streams[0].title, yt.author, yt.watch_url, video_url, yt.video_id, caption_name), | ||||
|                         transcriptions | ||||
|                     ) | ||||
|                 except Exception as e: | ||||
|                     logger.error(e) | ||||
|                     self.show_toast(_("This video is not available"), self.main_overlay) | ||||
|                     self.show_toast(_("Error attaching video, please try again"), self.main_overlay) | ||||
|             elif url_regex.match(text): | ||||
|                 dialog_widget.simple( | ||||
|                     _('Attach Website? (Experimental)'), | ||||
|  | ||||
| @ -884,14 +884,13 @@ | ||||
|               <child> | ||||
|                 <object class="GtkBox"> | ||||
|                   <child> | ||||
|                     <object class="GtkTextView" id="file_preview_text_view"> | ||||
|                     <object class="GtkLabel" id="file_preview_text_label"> | ||||
|                       <property name="margin-top">12</property> | ||||
|                       <property name="margin-bottom">12</property> | ||||
|                       <property name="margin-start">12</property> | ||||
|                       <property name="margin-end">12</property> | ||||
|                       <property name="hexpand">true</property> | ||||
|                       <property name="vexpand">true</property> | ||||
|                       <property name="editable">false</property> | ||||
|                       <property name="selectable">true</property> | ||||
|                     </object> | ||||
|                   </child> | ||||
|                   <child> | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user