From 2ac06ad8a5f9a8ac174d14f356eb5f597b1312db Mon Sep 17 00:00:00 2001 From: mesonium Date: Fri, 14 Jun 2024 19:53:26 +0200 Subject: [PATCH] Some more work on multi models --- stt_voice_messages/configs.py | 21 ++++++++ stt_voice_messages/gtk/config_dialog.py | 51 ++++++++++++++----- stt_voice_messages/gtk/sttbox.py | 8 ++- stt_voice_messages/models/model.py | 30 +++++++++++ stt_voice_messages/models/openai_whisper.py | 55 ++------------------- stt_voice_messages/stt_voice_messages.py | 31 ++++++------ 6 files changed, 112 insertions(+), 84 deletions(-) create mode 100644 stt_voice_messages/configs.py create mode 100644 stt_voice_messages/models/model.py diff --git a/stt_voice_messages/configs.py b/stt_voice_messages/configs.py new file mode 100644 index 0000000..831f2ff --- /dev/null +++ b/stt_voice_messages/configs.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass, field + +from whisper import available_models + +from gajim.common.app import Any + +from .models.model import Model + + +@dataclass +class PluginConfig: + general: dict[str, Any] = field(default_factory=lambda: { + 'model': None, + 'auto_transcribe': None, + }) + + openaiwhisper: dict[str, Any] = field(default_factory=lambda: { + 'model_size': 'tiny', + 'multilingual_model': True + }) + diff --git a/stt_voice_messages/gtk/config_dialog.py b/stt_voice_messages/gtk/config_dialog.py index 5164f21..dbc2a91 100644 --- a/stt_voice_messages/gtk/config_dialog.py +++ b/stt_voice_messages/gtk/config_dialog.py @@ -20,6 +20,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any from gi.repository import Gtk +import whisper from gajim.common import app from gajim.gtk.builder import get_builder @@ -29,17 +30,19 @@ from gajim.gtk.sidebar_switcher import SideBarSwitcher from gajim.plugins.helpers import get_builder from gajim.plugins.plugins_i18n import _ -from .. import stt_voice_messages from ..models import openai_whisper +from ..configs import * if TYPE_CHECKING: from .. import stt_voice_messages log = logging.getLogger('gajim.p.stt_voice_messages_config') + ################################################################################ # Helper ################################################################################ + def check_module(module: str) -> bool: try: __import__(module) @@ -68,7 +71,7 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow): self.set_name('PreferencesWindow') self.set_default_size(900, 650) self.set_resizable(True) - self.set_title(_('Preferences')) + self.set_title(_('STT Voice Messages - Preferences')) ui_path = Path(__file__).parent self._ui = get_builder(str(ui_path.resolve() / 'config_dialog.ui')) @@ -83,7 +86,7 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow): ('stt_behaviour', STTBehaviour), ('models', Models), ('file_preview', FilePreview), - ('whisper_general', openai_whisper.OpenAIWhisperGeneral), + ('whisper_general', OpenAIWhisperGeneral), ] self._add_prefs(prefs) @@ -92,21 +95,18 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow): def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]): for ui_name, klass in prefs: pref_box = getattr(self._ui, ui_name) - print('pref_box = ', pref_box) - if pref_box is None: - continue pref = klass(self) # pyright: ignore + print("pref = ", pref) pref_box.add(pref) self._prefs[ui_name] = pref def _on_setting(self, value: Any, data: Any) -> None: + if isinstance(value, str): + value.strip() self.plugin.config[data] = value + self.plugin.update() -################################################################################ -# Preference boxes -################################################################################ - class PreferenceBox(SettingsBox): def __init__(self, settings: list[Setting]) -> None: SettingsBox.__init__(self, None) @@ -120,9 +120,12 @@ class PreferenceBox(SettingsBox): self.update_states() +################################################################################ +# General Preferences +################################################################################ + class STTBehaviour(PreferenceBox): def __init__(self, *args: Any) -> None: - main_window_on_startup_items = { 'always': _('Always'), 'never': _('Never'), @@ -143,7 +146,6 @@ class STTBehaviour(PreferenceBox): class Models(PreferenceBox): def __init__(self, *args: Any) -> None: - main_window_on_startup_items = { 'always': _('Always'), 'never': _('Never'), @@ -164,7 +166,6 @@ class Models(PreferenceBox): class FilePreview(PreferenceBox): def __init__(self, *args: Any) -> None: - main_window_on_startup_items = { 'always': _('Always'), 'never': _('Never'), @@ -183,3 +184,27 @@ class FilePreview(PreferenceBox): PreferenceBox.__init__(self, settings) +################################################################################ +# Whisper Settings UI +################################################################################ + +class OpenAIWhisperGeneral(PreferenceBox): + def __init__(self, *args: Any) -> None: + self.config = PluginConfig().openaiwhisper + + settings = [ + Setting(SettingKind.POPOVER, + _('Language Model Size'), + SettingType.VALUE, + value=str(self.config['model_size']), + data='model_size', + callback=self._on_setting, + props={'entries': whisper.available_models()}), + ] + + PreferenceBox.__init__(self, settings) + + def _on_setting(self, value: Any, data: Any) -> None: + print("before: ", self.config) + self.config[data] = value + print("after: ", self.config) diff --git a/stt_voice_messages/gtk/sttbox.py b/stt_voice_messages/gtk/sttbox.py index 383ed41..225c324 100644 --- a/stt_voice_messages/gtk/sttbox.py +++ b/stt_voice_messages/gtk/sttbox.py @@ -12,21 +12,18 @@ # # You should have received a copy of the GNU General Public License # along with Gajim. If not, see . -from pathlib import Path from gi.repository import Gtk from gajim.plugins.gajimplugin import GajimPluginConfig from gajim.plugins.plugins_i18n import _ -from .. import helper - class STTBox(Gtk.Box): def __init__(self, preview_audio_widget: Gtk.Box, config: GajimPluginConfig, - audio_file: Path, + audio_file: str, ) -> None: Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12) @@ -39,7 +36,8 @@ class STTBox(Gtk.Box): self._transcribe_button = Gtk.Button(label=_('Transcribe')) - self._transcription_label = Gtk.Label(label=_('Nothing transcribed yet')) + self._transcription_label = Gtk.Label( + label=_('Nothing transcribed yet')) self._transcription_label.set_max_width_chars(40) self._transcription_label.set_line_wrap(True) diff --git a/stt_voice_messages/models/model.py b/stt_voice_messages/models/model.py new file mode 100644 index 0000000..6d8d7e1 --- /dev/null +++ b/stt_voice_messages/models/model.py @@ -0,0 +1,30 @@ +# This file is part of Gajim. +# +# Gajim is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Gajim is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Gajim. If not, see . + +from abc import ABC, abstractmethod +from pathlib import Path + +from gajim.gtk.const import Setting + + +class Model(ABC): + + @abstractmethod + def transcribe(self, audio_file: Path) -> str: + return '' + + @abstractmethod + def on_setting(self, setting: Setting): + pass \ No newline at end of file diff --git a/stt_voice_messages/models/openai_whisper.py b/stt_voice_messages/models/openai_whisper.py index f63df60..70cecb6 100644 --- a/stt_voice_messages/models/openai_whisper.py +++ b/stt_voice_messages/models/openai_whisper.py @@ -15,11 +15,10 @@ import typing from pathlib import Path -from typing import Any -from gajim.gtk.const import Setting, SettingKind, SettingType -from gajim.gtk.preferences import PreferenceBox -from gajim.plugins.plugins_i18n import _ +from .model import Model + +from gajim.gtk.const import Setting try: import whisper @@ -29,7 +28,7 @@ except ModuleNotFoundError: import whisper -class WhisperModel: +class WhisperModel(Model): def __init__(self): self._model_sizes = ['tiny', 'small', 'base', 'medium', 'large'] self._multilanguage = True @@ -43,50 +42,6 @@ class WhisperModel: result = model.transcribe(audio_file) return result["text"] - def _build_config(self) -> list[Setting]: - whisper_model_sizes = ('tiny', 'base', 'small', 'medium', 'large') - settings = [ - Setting(SettingKind.COMBO, - _('Language Model'), - SettingType.VALUE, - callback=self._on_setting, - props={'combo_items': whisper_model_sizes}), - - Setting(SettingKind.SWITCH, - _('Use Multilanguage Model'), - SettingType.VALUE, - self._multilanguage, - callback=self._on_setting, - data='use_multilanguage_model'), - ] - - return settings - - def _on_setting(self, setting: Setting): + def on_setting(self, setting: Setting): pass -################################################################################ -# Whisper Settings UI -################################################################################ - -class OpenAIWhisperGeneral(PreferenceBox): - def __init__(self, *args: Any) -> None: - - model_sizes = { - 'tiny': _('Tiny'), - 'small': _('Small'), - 'basic': _('Basic'), - 'medium': _('Medium'), - 'large': _('Large'), - } - - settings = [ - Setting(SettingKind.POPOVER, - _('Language Model'), - SettingType.VALUE, - callback=None, - props={'entries': model_sizes}, - desc=_('Model Size')), - ] - - PreferenceBox.__init__(self, settings) diff --git a/stt_voice_messages/stt_voice_messages.py b/stt_voice_messages/stt_voice_messages.py index e197387..b4dbdf7 100644 --- a/stt_voice_messages/stt_voice_messages.py +++ b/stt_voice_messages/stt_voice_messages.py @@ -32,29 +32,28 @@ log = logging.getLogger('gajim.p.stt_voice_messages') class STTVoiceMessagesPlugin(GajimPlugin): def init(self) -> None: self.description = _('Transcribes voice messages to text.') - self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog, self) + self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog, + self) self.gui_extension_points = { - 'preview_audio': (self._preview_audio_created, None), + 'preview_audio': (self._on_preview_audio_created, None), } - self._audio_file = None + self._audio_file: str = '' self._preview_audio_widget = None self._stt_box = None - def _preview_audio_created(self, - preview_audio_widget: Gtk.Box, - audio_file: Path - ) -> None: + def _on_preview_audio_created(self, + preview_audio_widget: Gtk.Box, + audio_file: Path + ) -> None: self._preview_audio_widget = preview_audio_widget self._audio_file = audio_file.as_posix() - #self._create_stt_box() - - #def _create_stt_box(self) -> None: - # assert self._preview_audio_widget is not None - # self._stt_box = sttbox.STTBox(self._preview_audio_widget, - # self.config, - # self._audio_file) - # self._preview_audio_widget.pack_end(self._stt_box, False, False, 0) - + self._create_stt_box() + def _create_stt_box(self) -> None: + assert self._preview_audio_widget is not None + self._stt_box = sttbox.STTBox(self._preview_audio_widget, + self.config, + self._audio_file) + self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)