From d510b74d3d70b495e670ffb0d604c68a15925bb8 Mon Sep 17 00:00:00 2001 From: mesonium Date: Wed, 12 Jun 2024 19:57:25 +0200 Subject: [PATCH] First steps to multi model plugin --- stt_voice_messages/config_dialog.py | 63 ---- stt_voice_messages/gtk/__init__.py | 0 stt_voice_messages/gtk/config_dialog.py | 185 ++++++++++++ stt_voice_messages/gtk/config_dialog.ui | 271 ++++++++++++++++++ stt_voice_messages/gtk/sttbox.py | 65 +++++ stt_voice_messages/helper.py | 65 +++++ stt_voice_messages/models/__init__.py | 0 stt_voice_messages/models/openai_whisper.py | 92 ++++++ stt_voice_messages/plugin-manifest.json | 2 +- ...ice_message.png => stt_voice_messages.png} | Bin stt_voice_messages/stt_voice_messages.py | 119 +------- 11 files changed, 690 insertions(+), 172 deletions(-) delete mode 100644 stt_voice_messages/config_dialog.py create mode 100644 stt_voice_messages/gtk/__init__.py create mode 100644 stt_voice_messages/gtk/config_dialog.py create mode 100644 stt_voice_messages/gtk/config_dialog.ui create mode 100644 stt_voice_messages/gtk/sttbox.py create mode 100644 stt_voice_messages/helper.py create mode 100644 stt_voice_messages/models/__init__.py create mode 100644 stt_voice_messages/models/openai_whisper.py rename stt_voice_messages/{stt_voice_message.png => stt_voice_messages.png} (100%) diff --git a/stt_voice_messages/config_dialog.py b/stt_voice_messages/config_dialog.py deleted file mode 100644 index 14eef6b..0000000 --- a/stt_voice_messages/config_dialog.py +++ /dev/null @@ -1,63 +0,0 @@ -# This file is part of Gajim. -# -# Gajim is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Gajim is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Gajim. If not, see . - -from __future__ import annotations - -from typing import Any -from typing import TYPE_CHECKING - -from gi.repository import Gtk - -from gajim.plugins.plugins_i18n import _ - -from gajim.gtk.settings import SettingsDialog -from gajim.gtk.const import Setting -from gajim.gtk.const import SettingKind -from gajim.gtk.const import SettingType - -if TYPE_CHECKING: - from .stt_voice_messages import STTVoiceMessagesPlugin - - -class STTVoiceMessagesConfigDialog(SettingsDialog): - def __init__(self, plugin: STTVoiceMessagesPlugin, parent: Gtk.Window) -> None: - - type_values = ('tiny', 'base', 'small', 'medium', 'large') - - self.plugin = plugin - settings = [ - Setting(SettingKind.COMBO, - _('Language Model'), - SettingType.VALUE, - callback=self._on_setting, - props={'combo_items': type_values}), - - Setting(SettingKind.SWITCH, - _('Use Multilanguage Model'), - SettingType.VALUE, - self.plugin.config['use_multilanguage_model'], - callback=self._on_setting, - data='use_multilanguage_model'), - ] - - SettingsDialog.__init__(self, - parent, - _('STT Voice Message Configuration'), - Gtk.DialogFlags.MODAL, - settings, - '') - - def _on_setting(self, value: Any, data: Any) -> None: - self.plugin.config[data] = value diff --git a/stt_voice_messages/gtk/__init__.py b/stt_voice_messages/gtk/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stt_voice_messages/gtk/config_dialog.py b/stt_voice_messages/gtk/config_dialog.py new file mode 100644 index 0000000..5164f21 --- /dev/null +++ b/stt_voice_messages/gtk/config_dialog.py @@ -0,0 +1,185 @@ +# This file is part of Gajim. +# +# Gajim is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Gajim is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Gajim. If not, see . + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from gi.repository import Gtk + +from gajim.common import app +from gajim.gtk.builder import get_builder +from gajim.gtk.const import Setting, SettingKind, SettingType +from gajim.gtk.settings import SettingsBox +from gajim.gtk.sidebar_switcher import SideBarSwitcher +from gajim.plugins.helpers import get_builder +from gajim.plugins.plugins_i18n import _ + +from .. import stt_voice_messages +from ..models import openai_whisper + +if TYPE_CHECKING: + from .. import stt_voice_messages + +log = logging.getLogger('gajim.p.stt_voice_messages_config') + +################################################################################ +# Helper +################################################################################ +def check_module(module: str) -> bool: + try: + __import__(module) + return True + except ModuleNotFoundError: + log.debug('Could not find module %s', module) + return False + except ImportError as ex: + log.debug(str(ex)) + return False + + +################################################################################ +# Plugin Settings +################################################################################ + +class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow): + def __init__(self, plugin: stt_voice_messages.STTVoiceMessagesPlugin, + parent: Gtk.Window) -> None: + Gtk.ApplicationWindow.__init__(self) + self.plugin = plugin + + self.set_application(app.app) + self.set_position(Gtk.WindowPosition.CENTER) + self.set_show_menubar(False) + self.set_name('PreferencesWindow') + self.set_default_size(900, 650) + self.set_resizable(True) + self.set_title(_('Preferences')) + + ui_path = Path(__file__).parent + self._ui = get_builder(str(ui_path.resolve() / 'config_dialog.ui')) + + self._prefs: dict[str, PreferenceBox] = {} + side_bar_switcher = SideBarSwitcher() + side_bar_switcher.set_stack(self._ui.stack) + self._ui.grid.attach(side_bar_switcher, 0, 0, 1, 1) + self.add(self._ui.grid) + + prefs: list[tuple[str, type[PreferenceBox]]] = [ + ('stt_behaviour', STTBehaviour), + ('models', Models), + ('file_preview', FilePreview), + ('whisper_general', openai_whisper.OpenAIWhisperGeneral), + ] + + self._add_prefs(prefs) + self.show_all() + + def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]): + for ui_name, klass in prefs: + pref_box = getattr(self._ui, ui_name) + print('pref_box = ', pref_box) + if pref_box is None: + continue + pref = klass(self) # pyright: ignore + pref_box.add(pref) + self._prefs[ui_name] = pref + + def _on_setting(self, value: Any, data: Any) -> None: + self.plugin.config[data] = value + + +################################################################################ +# Preference boxes +################################################################################ + +class PreferenceBox(SettingsBox): + def __init__(self, settings: list[Setting]) -> None: + SettingsBox.__init__(self, None) + self.get_style_context().add_class('border') + self.set_selection_mode(Gtk.SelectionMode.NONE) + self.set_vexpand(False) + self.set_valign(Gtk.Align.END) + + for setting in settings: + self.add_setting(setting) + self.update_states() + + +class STTBehaviour(PreferenceBox): + def __init__(self, *args: Any) -> None: + + main_window_on_startup_items = { + 'always': _('Always'), + 'never': _('Never'), + 'last_state': _('Restore last state'), + } + + settings = [ + Setting(SettingKind.POPOVER, + _('Show on Startup'), + SettingType.CONFIG, + 'show_main_window_on_startup', + props={'entries': main_window_on_startup_items}, + desc=_('Show window when starting Gajim')), + ] + + PreferenceBox.__init__(self, settings) + + +class Models(PreferenceBox): + def __init__(self, *args: Any) -> None: + + main_window_on_startup_items = { + 'always': _('Always'), + 'never': _('Never'), + 'last_state': _('Restore last state'), + } + + settings = [ + Setting(SettingKind.POPOVER, + _('Show on Startup'), + SettingType.CONFIG, + 'show_main_window_on_startup', + props={'entries': main_window_on_startup_items}, + desc=_('Show window when starting Gajim')), + ] + + PreferenceBox.__init__(self, settings) + + +class FilePreview(PreferenceBox): + def __init__(self, *args: Any) -> None: + + main_window_on_startup_items = { + 'always': _('Always'), + 'never': _('Never'), + 'last_state': _('Restore last state'), + } + + settings = [ + Setting(SettingKind.POPOVER, + _('Show on Startup'), + SettingType.CONFIG, + 'show_main_window_on_startup', + props={'entries': main_window_on_startup_items}, + desc=_('Show window when starting Gajim')), + ] + + PreferenceBox.__init__(self, settings) + + diff --git a/stt_voice_messages/gtk/config_dialog.ui b/stt_voice_messages/gtk/config_dialog.ui new file mode 100644 index 0000000..630d40d --- /dev/null +++ b/stt_voice_messages/gtk/config_dialog.ui @@ -0,0 +1,271 @@ + + + + + + + True + False + + + True + False + True + + + True + True + never + in + False + + + True + False + + + True + False + vertical + 24 + + + + True + False + vertical + 12 + + + True + False + Behaviour of STT Voice Messages + 0 + + + + 0 + 0 + + + + + False + True + 0 + + + + + + True + False + vertical + 12 + + + True + False + General Model Configuration + 0 + + + + 0 + 0 + + + + + False + True + 1 + + + + + + True + False + vertical + 12 + + + True + False + Preview UI + 0 + + + + 0 + 0 + + + + + False + True + 2 + + + + + + + + + general + General + computer-symbolic + + + + + True + True + never + in + False + + + True + False + + + True + False + vertical + 24 + + + + True + False + vertical + 12 + + + True + False + General + 0 + + + + 0 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + False + True + 0 + + + + + + + + + + + + + + + + + + + + + openai-whisper + openAI Whisper + 1 + + + + + + + + + + + + + + + + + + + + + 1 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/stt_voice_messages/gtk/sttbox.py b/stt_voice_messages/gtk/sttbox.py new file mode 100644 index 0000000..383ed41 --- /dev/null +++ b/stt_voice_messages/gtk/sttbox.py @@ -0,0 +1,65 @@ +# This file is part of Gajim. +# +# Gajim is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Gajim is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Gajim. If not, see . +from pathlib import Path + +from gi.repository import Gtk + +from gajim.plugins.gajimplugin import GajimPluginConfig +from gajim.plugins.plugins_i18n import _ + +from .. import helper + + +class STTBox(Gtk.Box): + def __init__(self, + preview_audio_widget: Gtk.Box, + config: GajimPluginConfig, + audio_file: Path, + ) -> None: + + Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12) + + self._config = config + self._preview_audio = preview_audio_widget + self._model = None + self._audio_file = audio_file + self._text = '' + + self._transcribe_button = Gtk.Button(label=_('Transcribe')) + + self._transcription_label = Gtk.Label(label=_('Nothing transcribed yet')) + self._transcription_label.set_max_width_chars(40) + self._transcription_label.set_line_wrap(True) + + self.add(self._transcribe_button) + self.add(self._transcription_label) + + self._transcribe_button.connect('clicked', self._on_transcribe_clicked) + + self.show_all() + + def _on_transcribe_clicked(self, _button: Gtk.Button): + #transcription_task = helper.BackgroundTask( + # self._model.transcribe(), + # self._show_result + #) + #transcription_task.start() + pass + + def _show_result(self): + if self._text.strip() != '': + self._transcription_label.set_text(self._text.strip()) + else: + self._transcription_label.set_text(_('_Have not heard any word!_')) diff --git a/stt_voice_messages/helper.py b/stt_voice_messages/helper.py new file mode 100644 index 0000000..8860201 --- /dev/null +++ b/stt_voice_messages/helper.py @@ -0,0 +1,65 @@ +# This file is part of Gajim. +# +# Gajim is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Gajim is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Gajim. If not, see . + +from gi.repository import Gio, GObject + +''' +https://discourse.gnome.org/t/gtk-threading-problem-with-glib-idle-add/13597/5 +https://github.com/gdm-settings/gdm-settings/blob/f245d3000200fa6be2a35c7f6ac45b131dadb5d6/src/utils.py#L116..L162 +''' + + +class BackgroundTask(GObject.Object): + __gtype_name__ = 'BackgroundTask' + + def __init__(self, function, finish_callback, **kwargs): + super().__init__(**kwargs) + + self.function = function + self.finish_callback = finish_callback + self._current = None + + def start(self): + if self._current: + AlreadyRunningError('Task is already running') + + finish_callback = lambda self, task, nothing: self.finish_callback() + + task = Gio.Task.new(self, None, finish_callback, None) + task.run_in_thread(self._thread_cb) + + self._current = task + + @staticmethod + def _thread_cb(task, self, task_data, cancellable): + try: + retval = self.function() + task.return_value(retval) + except Exception as e: + task.return_value(e) + + def finish(self): + task = self._current + self._current = None + + if not Gio.Task.is_valid(task, self): + raise InvalidGioTaskError() + + value = task.propagate_value().value + + if isinstance(value, Exception): + raise value + + return value diff --git a/stt_voice_messages/models/__init__.py b/stt_voice_messages/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stt_voice_messages/models/openai_whisper.py b/stt_voice_messages/models/openai_whisper.py new file mode 100644 index 0000000..f63df60 --- /dev/null +++ b/stt_voice_messages/models/openai_whisper.py @@ -0,0 +1,92 @@ +# This file is part of Gajim. +# +# Gajim is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Gajim is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Gajim. If not, see . + +import typing +from pathlib import Path +from typing import Any + +from gajim.gtk.const import Setting, SettingKind, SettingType +from gajim.gtk.preferences import PreferenceBox +from gajim.plugins.plugins_i18n import _ + +try: + import whisper + OPENAI_WHISPER_AVAILABLE = True +except ModuleNotFoundError: + if typing.TYPE_CHECKING: + import whisper + + +class WhisperModel: + def __init__(self): + self._model_sizes = ['tiny', 'small', 'base', 'medium', 'large'] + self._multilanguage = True + + self._config = { + 'model_size': 'tiny' + } + + def transcribe(self, audio_file: Path) -> str: + model = whisper.load_model(self._config['model_size']) + result = model.transcribe(audio_file) + return result["text"] + + def _build_config(self) -> list[Setting]: + whisper_model_sizes = ('tiny', 'base', 'small', 'medium', 'large') + settings = [ + Setting(SettingKind.COMBO, + _('Language Model'), + SettingType.VALUE, + callback=self._on_setting, + props={'combo_items': whisper_model_sizes}), + + Setting(SettingKind.SWITCH, + _('Use Multilanguage Model'), + SettingType.VALUE, + self._multilanguage, + callback=self._on_setting, + data='use_multilanguage_model'), + ] + + return settings + + def _on_setting(self, setting: Setting): + pass + +################################################################################ +# Whisper Settings UI +################################################################################ + +class OpenAIWhisperGeneral(PreferenceBox): + def __init__(self, *args: Any) -> None: + + model_sizes = { + 'tiny': _('Tiny'), + 'small': _('Small'), + 'basic': _('Basic'), + 'medium': _('Medium'), + 'large': _('Large'), + } + + settings = [ + Setting(SettingKind.POPOVER, + _('Language Model'), + SettingType.VALUE, + callback=None, + props={'entries': model_sizes}, + desc=_('Model Size')), + ] + + PreferenceBox.__init__(self, settings) diff --git a/stt_voice_messages/plugin-manifest.json b/stt_voice_messages/plugin-manifest.json index 6c688a8..e2df3a3 100644 --- a/stt_voice_messages/plugin-manifest.json +++ b/stt_voice_messages/plugin-manifest.json @@ -13,7 +13,7 @@ "win32" ], "requirements": [ - "gajim>=1.4.0" + "gajim>=1.9.0" ], "short_name": "stt_voice_messages", "version": "0.0.1" diff --git a/stt_voice_messages/stt_voice_message.png b/stt_voice_messages/stt_voice_messages.png similarity index 100% rename from stt_voice_messages/stt_voice_message.png rename to stt_voice_messages/stt_voice_messages.png diff --git a/stt_voice_messages/stt_voice_messages.py b/stt_voice_messages/stt_voice_messages.py index ad33ee2..e197387 100644 --- a/stt_voice_messages/stt_voice_messages.py +++ b/stt_voice_messages/stt_voice_messages.py @@ -19,30 +19,25 @@ import logging from functools import partial from pathlib import Path -import whisper -from gi.repository import Gio, GObject, Gtk -from stt_voice_messages.config_dialog import STTVoiceMessagesConfigDialog +from gi.repository import Gtk from gajim.plugins import GajimPlugin from gajim.plugins.plugins_i18n import _ +from .gtk import config_dialog, sttbox + log = logging.getLogger('gajim.p.stt_voice_messages') class STTVoiceMessagesPlugin(GajimPlugin): def init(self) -> None: self.description = _('Transcribes voice messages to text.') - self.config_dialog = partial(STTVoiceMessagesConfigDialog, self) + self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog, self) self.gui_extension_points = { 'preview_audio': (self._preview_audio_created, None), } - self.config_default_values = { - 'use_multilanguage_model': (True, ''), - 'model_size': ('base', '') - } - self._audio_file = None self._preview_audio_widget = None self._stt_box = None @@ -53,105 +48,13 @@ class STTVoiceMessagesPlugin(GajimPlugin): ) -> None: self._preview_audio_widget = preview_audio_widget self._audio_file = audio_file.as_posix() - self._create_stt_box() + #self._create_stt_box() - def _create_stt_box(self) -> None: - assert self._preview_audio_widget is not None - self._stt_box = STTBox(self._preview_audio_widget, - self.config, - self._audio_file) - self._preview_audio_widget.pack_end(self._stt_box, False, False, 0) - -class STTBox(Gtk.Box): - def __init__(self, - preview_audio_widget: Gtk.Box, - config: GajimPluginConfig, - audio_file: Path, - ) -> None: - - Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12) - - self._config = config - self._preview_audio = preview_audio_widget - self._audio_file = audio_file - self._text = '' - - self._transcribe_button = Gtk.Button(label=_('Transcribe')) - - self._transcription_label = Gtk.Label(label=_('Nothing transcribed yet')) - self._transcription_label.set_max_width_chars(40) - self._transcription_label.set_line_wrap(True) - - self.add(self._transcribe_button) - self.add(self._transcription_label) - - self._transcribe_button.connect('clicked', self._on_transcribe_clicked) - - self.show_all() - - def _on_transcribe_clicked(self, _button: Gtk.Button): - transcription_task = BackgroundTask( - self._trascribe_by_whisper, - self._show_result - ) - transcription_task.start() - - def _show_result(self): - if self._text.strip() != '': - self._transcription_label.set_text(self._text.strip()) - else: - self._transcription_label.set_text(_('_Have not heard any word!_')) - - def _trascribe_by_whisper(self) -> str: - model = whisper.load_model(self._config['model_size']) - result = model.transcribe(self._audio_file) - self._text = result["text"] - return text + #def _create_stt_box(self) -> None: + # assert self._preview_audio_widget is not None + # self._stt_box = sttbox.STTBox(self._preview_audio_widget, + # self.config, + # self._audio_file) + # self._preview_audio_widget.pack_end(self._stt_box, False, False, 0) -''' -https://discourse.gnome.org/t/gtk-threading-problem-with-glib-idle-add/13597/5 -https://github.com/gdm-settings/gdm-settings/blob/f245d3000200fa6be2a35c7f6ac45b131dadb5d6/src/utils.py#L116..L162 -''' -class BackgroundTask (GObject.Object): - __gtype_name__ = 'BackgroundTask' - - def __init__ (self, function, finish_callback, **kwargs): - super().__init__(**kwargs) - - self.function = function - self.finish_callback = finish_callback - self._current = None - - def start(self): - if self._current: - AlreadyRunningError('Task is already running') - - finish_callback = lambda self, task, nothing: self.finish_callback() - - task = Gio.Task.new(self, None, finish_callback, None) - task.run_in_thread(self._thread_cb) - - self._current = task - - @staticmethod - def _thread_cb (task, self, task_data, cancellable): - try: - retval = self.function() - task.return_value(retval) - except Exception as e: - task.return_value(e) - - def finish (self): - task = self._current - self._current = None - - if not Gio.Task.is_valid(task, self): - raise InvalidGioTaskError() - - value = task.propagate_value().value - - if isinstance(value, Exception): - raise value - - return value