From 2ac06ad8a5f9a8ac174d14f356eb5f597b1312db Mon Sep 17 00:00:00 2001
From: mesonium <mesonium@posteo.eu>
Date: Fri, 14 Jun 2024 19:53:26 +0200
Subject: [PATCH] Some more work on multi models

---
 stt_voice_messages/configs.py               | 21 ++++++++
 stt_voice_messages/gtk/config_dialog.py     | 51 ++++++++++++++-----
 stt_voice_messages/gtk/sttbox.py            |  8 ++-
 stt_voice_messages/models/model.py          | 30 +++++++++++
 stt_voice_messages/models/openai_whisper.py | 55 ++-------------------
 stt_voice_messages/stt_voice_messages.py    | 31 ++++++------
 6 files changed, 112 insertions(+), 84 deletions(-)
 create mode 100644 stt_voice_messages/configs.py
 create mode 100644 stt_voice_messages/models/model.py

diff --git a/stt_voice_messages/configs.py b/stt_voice_messages/configs.py
new file mode 100644
index 0000000..831f2ff
--- /dev/null
+++ b/stt_voice_messages/configs.py
@@ -0,0 +1,21 @@
+from dataclasses import dataclass, field
+
+from whisper import available_models
+
+from gajim.common.app import Any
+
+from .models.model import Model
+
+
+@dataclass
+class PluginConfig:
+    general: dict[str, Any] = field(default_factory=lambda: {
+        'model': None,
+        'auto_transcribe': None,
+    })
+
+    openaiwhisper: dict[str, Any] = field(default_factory=lambda: {
+        'model_size': 'tiny',
+        'multilingual_model': True
+    })
+
diff --git a/stt_voice_messages/gtk/config_dialog.py b/stt_voice_messages/gtk/config_dialog.py
index 5164f21..dbc2a91 100644
--- a/stt_voice_messages/gtk/config_dialog.py
+++ b/stt_voice_messages/gtk/config_dialog.py
@@ -20,6 +20,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 from gi.repository import Gtk
+import whisper
 
 from gajim.common import app
 from gajim.gtk.builder import get_builder
@@ -29,17 +30,19 @@ from gajim.gtk.sidebar_switcher import SideBarSwitcher
 from gajim.plugins.helpers import get_builder
 from gajim.plugins.plugins_i18n import _
 
-from .. import stt_voice_messages
 from ..models import openai_whisper
+from ..configs import *
 
 if TYPE_CHECKING:
     from .. import stt_voice_messages
 
 log = logging.getLogger('gajim.p.stt_voice_messages_config')
 
+
 ################################################################################
 # Helper
 ################################################################################
+
 def check_module(module: str) -> bool:
     try:
         __import__(module)
@@ -68,7 +71,7 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
         self.set_name('PreferencesWindow')
         self.set_default_size(900, 650)
         self.set_resizable(True)
-        self.set_title(_('Preferences'))
+        self.set_title(_('STT Voice Messages - Preferences'))
 
         ui_path = Path(__file__).parent
         self._ui = get_builder(str(ui_path.resolve() / 'config_dialog.ui'))
@@ -83,7 +86,7 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
             ('stt_behaviour', STTBehaviour),
             ('models', Models),
             ('file_preview', FilePreview),
-            ('whisper_general', openai_whisper.OpenAIWhisperGeneral),
+            ('whisper_general', OpenAIWhisperGeneral),
         ]
 
         self._add_prefs(prefs)
@@ -92,21 +95,18 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
     def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
         for ui_name, klass in prefs:
             pref_box = getattr(self._ui, ui_name)
-            print('pref_box = ', pref_box)
-            if pref_box is None:
-                continue
             pref = klass(self)  # pyright: ignore
+            print("pref = ", pref)
             pref_box.add(pref)
             self._prefs[ui_name] = pref
 
     def _on_setting(self, value: Any, data: Any) -> None:
+        if isinstance(value, str):
+            value.strip()
         self.plugin.config[data] = value
+        self.plugin.update()
 
 
-################################################################################
-# Preference boxes
-################################################################################
-
 class PreferenceBox(SettingsBox):
     def __init__(self, settings: list[Setting]) -> None:
         SettingsBox.__init__(self, None)
@@ -120,9 +120,12 @@ class PreferenceBox(SettingsBox):
         self.update_states()
 
 
+################################################################################
+# General Preferences
+################################################################################
+
 class STTBehaviour(PreferenceBox):
     def __init__(self, *args: Any) -> None:
-
         main_window_on_startup_items = {
             'always': _('Always'),
             'never': _('Never'),
@@ -143,7 +146,6 @@ class STTBehaviour(PreferenceBox):
 
 class Models(PreferenceBox):
     def __init__(self, *args: Any) -> None:
-
         main_window_on_startup_items = {
             'always': _('Always'),
             'never': _('Never'),
@@ -164,7 +166,6 @@ class Models(PreferenceBox):
 
 class FilePreview(PreferenceBox):
     def __init__(self, *args: Any) -> None:
-
         main_window_on_startup_items = {
             'always': _('Always'),
             'never': _('Never'),
@@ -183,3 +184,27 @@ class FilePreview(PreferenceBox):
         PreferenceBox.__init__(self, settings)
 
 
+################################################################################
+# Whisper Settings UI
+################################################################################
+
+class OpenAIWhisperGeneral(PreferenceBox):
+    def __init__(self, *args: Any) -> None:
+        self.config = PluginConfig().openaiwhisper
+
+        settings = [
+            Setting(SettingKind.POPOVER,
+                    _('Language Model Size'),
+                    SettingType.VALUE,
+                    value=str(self.config['model_size']),
+                    data='model_size',
+                    callback=self._on_setting,
+                    props={'entries': whisper.available_models()}),
+        ]
+
+        PreferenceBox.__init__(self, settings)
+
+    def _on_setting(self, value: Any, data: Any) -> None:
+        print("before: ", self.config)
+        self.config[data] = value
+        print("after: ", self.config)
diff --git a/stt_voice_messages/gtk/sttbox.py b/stt_voice_messages/gtk/sttbox.py
index 383ed41..225c324 100644
--- a/stt_voice_messages/gtk/sttbox.py
+++ b/stt_voice_messages/gtk/sttbox.py
@@ -12,21 +12,18 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with Gajim. If not, see <http://www.gnu.org/licenses/>.
-from pathlib import Path
 
 from gi.repository import Gtk
 
 from gajim.plugins.gajimplugin import GajimPluginConfig
 from gajim.plugins.plugins_i18n import _
 
-from .. import helper
-
 
 class STTBox(Gtk.Box):
     def __init__(self,
                  preview_audio_widget: Gtk.Box,
                  config: GajimPluginConfig,
-                 audio_file: Path,
+                 audio_file: str,
                  ) -> None:
 
         Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12)
@@ -39,7 +36,8 @@ class STTBox(Gtk.Box):
 
         self._transcribe_button = Gtk.Button(label=_('Transcribe'))
 
-        self._transcription_label = Gtk.Label(label=_('Nothing transcribed yet'))
+        self._transcription_label = Gtk.Label(
+            label=_('Nothing transcribed yet'))
         self._transcription_label.set_max_width_chars(40)
         self._transcription_label.set_line_wrap(True)
 
diff --git a/stt_voice_messages/models/model.py b/stt_voice_messages/models/model.py
new file mode 100644
index 0000000..6d8d7e1
--- /dev/null
+++ b/stt_voice_messages/models/model.py
@@ -0,0 +1,30 @@
+# This file is part of Gajim.
+#
+# Gajim is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Gajim is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
+
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+from gajim.gtk.const import Setting
+
+
+class Model(ABC):
+
+    @abstractmethod
+    def transcribe(self, audio_file: Path) -> str:
+        return ''
+
+    @abstractmethod
+    def on_setting(self, setting: Setting):
+        pass
\ No newline at end of file
diff --git a/stt_voice_messages/models/openai_whisper.py b/stt_voice_messages/models/openai_whisper.py
index f63df60..70cecb6 100644
--- a/stt_voice_messages/models/openai_whisper.py
+++ b/stt_voice_messages/models/openai_whisper.py
@@ -15,11 +15,10 @@
 
 import typing
 from pathlib import Path
-from typing import Any
 
-from gajim.gtk.const import Setting, SettingKind, SettingType
-from gajim.gtk.preferences import PreferenceBox
-from gajim.plugins.plugins_i18n import _
+from .model import Model
+
+from gajim.gtk.const import Setting
 
 try:
     import whisper
@@ -29,7 +28,7 @@ except ModuleNotFoundError:
         import whisper
 
 
-class WhisperModel:
+class WhisperModel(Model):
     def __init__(self):
         self._model_sizes = ['tiny', 'small', 'base', 'medium', 'large']
         self._multilanguage = True
@@ -43,50 +42,6 @@ class WhisperModel:
         result = model.transcribe(audio_file)
         return result["text"]
 
-    def _build_config(self) -> list[Setting]:
-        whisper_model_sizes = ('tiny', 'base', 'small', 'medium', 'large')
-        settings = [
-            Setting(SettingKind.COMBO,
-                    _('Language Model'),
-                    SettingType.VALUE,
-                    callback=self._on_setting,
-                    props={'combo_items': whisper_model_sizes}),
-
-            Setting(SettingKind.SWITCH,
-                    _('Use Multilanguage Model'),
-                    SettingType.VALUE,
-                    self._multilanguage,
-                    callback=self._on_setting,
-                    data='use_multilanguage_model'),
-        ]
-
-        return settings
-
-    def _on_setting(self, setting: Setting):
+    def on_setting(self, setting: Setting):
         pass
 
-################################################################################
-# Whisper Settings UI
-################################################################################
-
-class OpenAIWhisperGeneral(PreferenceBox):
-    def __init__(self, *args: Any) -> None:
-
-        model_sizes = {
-            'tiny': _('Tiny'),
-            'small': _('Small'),
-            'basic': _('Basic'),
-            'medium': _('Medium'),
-            'large': _('Large'),
-        }
-
-        settings = [
-            Setting(SettingKind.POPOVER,
-                    _('Language Model'),
-                    SettingType.VALUE,
-                    callback=None,
-                    props={'entries': model_sizes},
-                    desc=_('Model Size')),
-        ]
-
-        PreferenceBox.__init__(self, settings)
diff --git a/stt_voice_messages/stt_voice_messages.py b/stt_voice_messages/stt_voice_messages.py
index e197387..b4dbdf7 100644
--- a/stt_voice_messages/stt_voice_messages.py
+++ b/stt_voice_messages/stt_voice_messages.py
@@ -32,29 +32,28 @@ log = logging.getLogger('gajim.p.stt_voice_messages')
 class STTVoiceMessagesPlugin(GajimPlugin):
     def init(self) -> None:
         self.description = _('Transcribes voice messages to text.')
-        self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog, self)
+        self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog,
+                                     self)
 
         self.gui_extension_points = {
-            'preview_audio': (self._preview_audio_created, None),
+            'preview_audio': (self._on_preview_audio_created, None),
         }
 
-        self._audio_file = None
+        self._audio_file: str = ''
         self._preview_audio_widget = None
         self._stt_box = None
 
-    def _preview_audio_created(self,
-                           preview_audio_widget: Gtk.Box,
-                           audio_file: Path
-                           ) -> None:
+    def _on_preview_audio_created(self,
+                                  preview_audio_widget: Gtk.Box,
+                                  audio_file: Path
+                                  ) -> None:
         self._preview_audio_widget = preview_audio_widget
         self._audio_file = audio_file.as_posix()
-        #self._create_stt_box()
-
-    #def _create_stt_box(self) -> None:
-    #    assert self._preview_audio_widget is not None
-    #    self._stt_box = sttbox.STTBox(self._preview_audio_widget,
-    #                            self.config,
-    #                            self._audio_file)
-    #    self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)
-
+        self._create_stt_box()
 
+    def _create_stt_box(self) -> None:
+        assert self._preview_audio_widget is not None
+        self._stt_box = sttbox.STTBox(self._preview_audio_widget,
+                                      self.config,
+                                      self._audio_file)
+        self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)