Some more work on multi models

2024-06-14 19:53:26 +02:00
parent d510b74d3d
commit 2ac06ad8a5
6 changed files with 112 additions and 84 deletions
@@ -0,0 +1,21 @@
+from dataclasses import dataclass, field
+
+from whisper import available_models
+
+from gajim.common.app import Any
+
+from .models.model import Model
+
+
+@dataclass
+class PluginConfig:
+    general: dict[str, Any] = field(default_factory=lambda: {
+        'model': None,
+        'auto_transcribe': None,
+    })
+
+    openaiwhisper: dict[str, Any] = field(default_factory=lambda: {
+        'model_size': 'tiny',
+        'multilingual_model': True
+    })
+
@@ -20,6 +20,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any

 from gi.repository import Gtk
+import whisper

 from gajim.common import app
 from gajim.gtk.builder import get_builder
@@ -29,17 +30,19 @@ from gajim.gtk.sidebar_switcher import SideBarSwitcher
 from gajim.plugins.helpers import get_builder
 from gajim.plugins.plugins_i18n import _

-from .. import stt_voice_messages
 from ..models import openai_whisper
+from ..configs import *

 if TYPE_CHECKING:
    from .. import stt_voice_messages

 log = logging.getLogger('gajim.p.stt_voice_messages_config')

+
 ################################################################################
 # Helper
 ################################################################################
+
 def check_module(module: str) -> bool:
    try:
        __import__(module)
@@ -68,7 +71,7 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
        self.set_name('PreferencesWindow')
        self.set_default_size(900, 650)
        self.set_resizable(True)
-        self.set_title(_('Preferences'))
+        self.set_title(_('STT Voice Messages - Preferences'))

        ui_path = Path(__file__).parent
        self._ui = get_builder(str(ui_path.resolve() / 'config_dialog.ui'))
@@ -83,7 +86,7 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
            ('stt_behaviour', STTBehaviour),
            ('models', Models),
            ('file_preview', FilePreview),
-            ('whisper_general', openai_whisper.OpenAIWhisperGeneral),
+            ('whisper_general', OpenAIWhisperGeneral),
        ]

        self._add_prefs(prefs)
@@ -92,21 +95,18 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
    def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
        for ui_name, klass in prefs:
            pref_box = getattr(self._ui, ui_name)
-            print('pref_box = ', pref_box)
-            if pref_box is None:
-                continue
            pref = klass(self)  # pyright: ignore
+            print("pref = ", pref)
            pref_box.add(pref)
            self._prefs[ui_name] = pref

    def _on_setting(self, value: Any, data: Any) -> None:
+        if isinstance(value, str):
+            value.strip()
        self.plugin.config[data] = value
+        self.plugin.update()


-################################################################################
-# Preference boxes
-################################################################################
-
 class PreferenceBox(SettingsBox):
    def __init__(self, settings: list[Setting]) -> None:
        SettingsBox.__init__(self, None)
@@ -120,9 +120,12 @@ class PreferenceBox(SettingsBox):
        self.update_states()


+################################################################################
+# General Preferences
+################################################################################
+
 class STTBehaviour(PreferenceBox):
    def __init__(self, *args: Any) -> None:
-
        main_window_on_startup_items = {
            'always': _('Always'),
            'never': _('Never'),
@@ -143,7 +146,6 @@ class STTBehaviour(PreferenceBox):

 class Models(PreferenceBox):
    def __init__(self, *args: Any) -> None:
-
        main_window_on_startup_items = {
            'always': _('Always'),
            'never': _('Never'),
@@ -164,7 +166,6 @@ class Models(PreferenceBox):

 class FilePreview(PreferenceBox):
    def __init__(self, *args: Any) -> None:
-
        main_window_on_startup_items = {
            'always': _('Always'),
            'never': _('Never'),
@@ -183,3 +184,27 @@ class FilePreview(PreferenceBox):
        PreferenceBox.__init__(self, settings)


+################################################################################
+# Whisper Settings UI
+################################################################################
+
+class OpenAIWhisperGeneral(PreferenceBox):
+    def __init__(self, *args: Any) -> None:
+        self.config = PluginConfig().openaiwhisper
+
+        settings = [
+            Setting(SettingKind.POPOVER,
+                    _('Language Model Size'),
+                    SettingType.VALUE,
+                    value=str(self.config['model_size']),
+                    data='model_size',
+                    callback=self._on_setting,
+                    props={'entries': whisper.available_models()}),
+        ]
+
+        PreferenceBox.__init__(self, settings)
+
+    def _on_setting(self, value: Any, data: Any) -> None:
+        print("before: ", self.config)
+        self.config[data] = value
+        print("after: ", self.config)
@@ -12,21 +12,18 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with Gajim. If not, see <http://www.gnu.org/licenses/>.
-from pathlib import Path

 from gi.repository import Gtk

 from gajim.plugins.gajimplugin import GajimPluginConfig
 from gajim.plugins.plugins_i18n import _

-from .. import helper
-

 class STTBox(Gtk.Box):
    def __init__(self,
                 preview_audio_widget: Gtk.Box,
                 config: GajimPluginConfig,
-                 audio_file: Path,
+                 audio_file: str,
                 ) -> None:

        Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12)
@@ -39,7 +36,8 @@ class STTBox(Gtk.Box):

        self._transcribe_button = Gtk.Button(label=_('Transcribe'))

-        self._transcription_label = Gtk.Label(label=_('Nothing transcribed yet'))
+        self._transcription_label = Gtk.Label(
+            label=_('Nothing transcribed yet'))
        self._transcription_label.set_max_width_chars(40)
        self._transcription_label.set_line_wrap(True)

@@ -0,0 +1,30 @@
+# This file is part of Gajim.
+#
+# Gajim is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Gajim is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
+
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+from gajim.gtk.const import Setting
+
+
+class Model(ABC):
+
+    @abstractmethod
+    def transcribe(self, audio_file: Path) -> str:
+        return ''
+
+    @abstractmethod
+    def on_setting(self, setting: Setting):
+        pass
@@ -15,11 +15,10 @@

 import typing
 from pathlib import Path
-from typing import Any

-from gajim.gtk.const import Setting, SettingKind, SettingType
-from gajim.gtk.preferences import PreferenceBox
-from gajim.plugins.plugins_i18n import _
+from .model import Model
+
+from gajim.gtk.const import Setting

 try:
    import whisper
@@ -29,7 +28,7 @@ except ModuleNotFoundError:
        import whisper


-class WhisperModel:
+class WhisperModel(Model):
    def __init__(self):
        self._model_sizes = ['tiny', 'small', 'base', 'medium', 'large']
        self._multilanguage = True
@@ -43,50 +42,6 @@ class WhisperModel:
        result = model.transcribe(audio_file)
        return result["text"]

-    def _build_config(self) -> list[Setting]:
-        whisper_model_sizes = ('tiny', 'base', 'small', 'medium', 'large')
-        settings = [
-            Setting(SettingKind.COMBO,
-                    _('Language Model'),
-                    SettingType.VALUE,
-                    callback=self._on_setting,
-                    props={'combo_items': whisper_model_sizes}),
-
-            Setting(SettingKind.SWITCH,
-                    _('Use Multilanguage Model'),
-                    SettingType.VALUE,
-                    self._multilanguage,
-                    callback=self._on_setting,
-                    data='use_multilanguage_model'),
-        ]
-
-        return settings
-
-    def _on_setting(self, setting: Setting):
+    def on_setting(self, setting: Setting):
        pass

-################################################################################
-# Whisper Settings UI
-################################################################################
-
-class OpenAIWhisperGeneral(PreferenceBox):
-    def __init__(self, *args: Any) -> None:
-
-        model_sizes = {
-            'tiny': _('Tiny'),
-            'small': _('Small'),
-            'basic': _('Basic'),
-            'medium': _('Medium'),
-            'large': _('Large'),
-        }
-
-        settings = [
-            Setting(SettingKind.POPOVER,
-                    _('Language Model'),
-                    SettingType.VALUE,
-                    callback=None,
-                    props={'entries': model_sizes},
-                    desc=_('Model Size')),
-        ]
-
-        PreferenceBox.__init__(self, settings)
@@ -32,29 +32,28 @@ log = logging.getLogger('gajim.p.stt_voice_messages')
 class STTVoiceMessagesPlugin(GajimPlugin):
    def init(self) -> None:
        self.description = _('Transcribes voice messages to text.')
-        self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog, self)
+        self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog,
+                                     self)

        self.gui_extension_points = {
-            'preview_audio': (self._preview_audio_created, None),
+            'preview_audio': (self._on_preview_audio_created, None),
        }

-        self._audio_file = None
+        self._audio_file: str = ''
        self._preview_audio_widget = None
        self._stt_box = None

-    def _preview_audio_created(self,
-                           preview_audio_widget: Gtk.Box,
-                           audio_file: Path
-                           ) -> None:
+    def _on_preview_audio_created(self,
+                                  preview_audio_widget: Gtk.Box,
+                                  audio_file: Path
+                                  ) -> None:
        self._preview_audio_widget = preview_audio_widget
        self._audio_file = audio_file.as_posix()
-        #self._create_stt_box()
-
-    #def _create_stt_box(self) -> None:
-    #    assert self._preview_audio_widget is not None
-    #    self._stt_box = sttbox.STTBox(self._preview_audio_widget,
-    #                            self.config,
-    #                            self._audio_file)
-    #    self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)
-
+        self._create_stt_box()

+    def _create_stt_box(self) -> None:
+        assert self._preview_audio_widget is not None
+        self._stt_box = sttbox.STTBox(self._preview_audio_widget,
+                                      self.config,
+                                      self._audio_file)
+        self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)