diff --git a/stt_voice_messages/gtk/config_dialog.py b/stt_voice_messages/gtk/config_dialog.py index 4592d80..805164a 100644 --- a/stt_voice_messages/gtk/config_dialog.py +++ b/stt_voice_messages/gtk/config_dialog.py @@ -32,12 +32,13 @@ from gajim.gtk.sidebar_switcher import SideBarSwitcher from gajim.plugins.helpers import get_builder from gajim.plugins.plugins_i18n import _ +from ..model_settings import * from ..models import openai_whisper if TYPE_CHECKING: from ..stt_voice_messages import STTVoiceMessagesPlugin -log = logging.getLogger('gajim.p.stt_voice_messages_config_dialog') +log = logging.getLogger('gajim.p.sttvm_config_dialog') SUPPORTED_MODELS: dict[str, dict[str, typing.Union[list[str], Any, str]]] = { @@ -67,6 +68,7 @@ SUPPORTED_MODELS: dict[str, dict[str, typing.Union[list[str], Any, str]]] = { class Configuration: def __init__(self, plugin: STTVoiceMessagesPlugin): self._plugin = plugin + self._openaiwhisper_settings = OpenAIWhisperSettings() self._available_models: dict[ str, dict[str, typing.Union[list[str], Any, str]]] = {} self.check_available_moduls() @@ -84,14 +86,20 @@ class Configuration: value.strip() log.debug('plugin config before:\n %s', self.plugin.config.data) self.plugin.config[data] = value + self._plugin.config['model_instance'].on_setting(data, value) log.debug('plugin config after:\n %s', self.plugin.config.data) def on_set_model(self, value: Any, data: Any) -> None: if isinstance(value, str): value.strip() log.debug('plugin config before:\n %s', self.plugin.config.data) + + self._available_models[value]['model_instance'] = self._available_models[value]['class']() + self.plugin.config['model_class'] = self._available_models[value][ 'class'] + self.plugin.config['model_instance'] = self._available_models[value]['model_instance'] + self.on_setting(value, data) log.debug('plugin config after:\n %s', self.plugin.config.data) diff --git a/stt_voice_messages/gtk/sttbox.py b/stt_voice_messages/gtk/sttbox.py index 995ab3f..4280c9a 100644 --- a/stt_voice_messages/gtk/sttbox.py +++ b/stt_voice_messages/gtk/sttbox.py @@ -49,26 +49,29 @@ class STTBox(Gtk.Box): self.add(self._transcribe_button) self.add(self._transcription_label) + self._result = helper.Results('') + self._transcribe_button.connect('clicked', self._on_transcribe_clicked) self.show_all() def _on_transcribe_clicked(self, _button: Gtk.Button) -> None: log.debug('config.data = %s', self._config.data) - model_class = self._config.data['model_class'] - if model_class is None: + model = self._config.data['model_instance'] + if model is None: return - self._model = model_class() + self._model = model transcription_task = helper.BackgroundTask( - self._model.transcribe(self._audio_file), + self._model.transcribe(self._result, self._audio_file), self._show_result ) transcription_task.start() def _show_result(self): - self._text = self._model.result + assert self._model is not None + self._text = self._result.text if self._text.strip() != '': self._transcription_label.set_text(self._text.strip()) else: diff --git a/stt_voice_messages/helper.py b/stt_voice_messages/helper.py index 8860201..88cc853 100644 --- a/stt_voice_messages/helper.py +++ b/stt_voice_messages/helper.py @@ -13,8 +13,17 @@ # You should have received a copy of the GNU General Public License # along with Gajim. If not, see . +from dataclasses import dataclass + from gi.repository import Gio, GObject + +@dataclass +class Results: + text: str + + + ''' https://discourse.gnome.org/t/gtk-threading-problem-with-glib-idle-add/13597/5 https://github.com/gdm-settings/gdm-settings/blob/f245d3000200fa6be2a35c7f6ac45b131dadb5d6/src/utils.py#L116..L162 diff --git a/stt_voice_messages/model_settings.py b/stt_voice_messages/model_settings.py new file mode 100644 index 0000000..3228888 --- /dev/null +++ b/stt_voice_messages/model_settings.py @@ -0,0 +1,23 @@ +# This file is part of Gajim. +# +# Gajim is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Gajim is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Gajim. If not, see . + + +from dataclasses import dataclass, field + + +@dataclass +class OpenAIWhisperSettings: + whisperai_model_size: str = field(default='tiny', init=True) + diff --git a/stt_voice_messages/models/model.py b/stt_voice_messages/models/model.py index 6d8d7e1..99e3881 100644 --- a/stt_voice_messages/models/model.py +++ b/stt_voice_messages/models/model.py @@ -18,11 +18,13 @@ from pathlib import Path from gajim.gtk.const import Setting +from ..helper import Results + class Model(ABC): @abstractmethod - def transcribe(self, audio_file: Path) -> str: + def transcribe(self, result: Results, audio_file: Path) -> str: return '' @abstractmethod diff --git a/stt_voice_messages/models/openai_whisper.py b/stt_voice_messages/models/openai_whisper.py index e50e808..9110a7a 100644 --- a/stt_voice_messages/models/openai_whisper.py +++ b/stt_voice_messages/models/openai_whisper.py @@ -13,12 +13,16 @@ # You should have received a copy of the GNU General Public License # along with Gajim. If not, see . +import logging import typing +from dataclasses import dataclass from pathlib import Path +from ..helper import Results +from ..model_settings import OpenAIWhisperSettings from .model import Model -from gajim.gtk.const import Setting +log = logging.getLogger('gajim.p.sttvm_whisper') try: import whisper @@ -27,27 +31,26 @@ except ModuleNotFoundError: if typing.TYPE_CHECKING: import whisper +@dataclass +class Configuration: + model_size: str class WhisperModel(Model): def __init__(self): # TODO - self._model_sizes = ['tiny', 'small', 'base', 'medium', 'large'] - self._multilanguage = True self._result: str = '' - - self._config = { - 'model_size': 'tiny' - } + self._config = OpenAIWhisperSettings() @property def result(self) -> str: return self._result - def transcribe(self, audio_file: Path) -> str: - model = whisper.load_model(self._config['model_size']) - result = model.transcribe(audio_file) - self._result = result['text'] + def transcribe(self, result: Results, audio_file: Path) -> str: + model = whisper.load_model(self._config['whisperai_model_size']) + log.debug('model size is used = %s', self._config['whisperai_model_size']) + result.text = model.transcribe(audio_file)['text'] - def on_setting(self, setting: Setting): - pass + def on_setting(self, key, value): + log.debug('key = %s, value = %s', key, value) + self._config[key] = value