Some more work on multi models

This commit is contained in:
mesonium
2024-06-14 19:53:26 +02:00
committed by hueso
parent d510b74d3d
commit 2ac06ad8a5
6 changed files with 112 additions and 84 deletions

View File

@@ -0,0 +1,21 @@
from dataclasses import dataclass, field
from whisper import available_models
from gajim.common.app import Any
from .models.model import Model
@dataclass
class PluginConfig:
general: dict[str, Any] = field(default_factory=lambda: {
'model': None,
'auto_transcribe': None,
})
openaiwhisper: dict[str, Any] = field(default_factory=lambda: {
'model_size': 'tiny',
'multilingual_model': True
})

View File

@@ -20,6 +20,7 @@ from pathlib import Path
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
from gi.repository import Gtk from gi.repository import Gtk
import whisper
from gajim.common import app from gajim.common import app
from gajim.gtk.builder import get_builder from gajim.gtk.builder import get_builder
@@ -29,17 +30,19 @@ from gajim.gtk.sidebar_switcher import SideBarSwitcher
from gajim.plugins.helpers import get_builder from gajim.plugins.helpers import get_builder
from gajim.plugins.plugins_i18n import _ from gajim.plugins.plugins_i18n import _
from .. import stt_voice_messages
from ..models import openai_whisper from ..models import openai_whisper
from ..configs import *
if TYPE_CHECKING: if TYPE_CHECKING:
from .. import stt_voice_messages from .. import stt_voice_messages
log = logging.getLogger('gajim.p.stt_voice_messages_config') log = logging.getLogger('gajim.p.stt_voice_messages_config')
################################################################################ ################################################################################
# Helper # Helper
################################################################################ ################################################################################
def check_module(module: str) -> bool: def check_module(module: str) -> bool:
try: try:
__import__(module) __import__(module)
@@ -68,7 +71,7 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
self.set_name('PreferencesWindow') self.set_name('PreferencesWindow')
self.set_default_size(900, 650) self.set_default_size(900, 650)
self.set_resizable(True) self.set_resizable(True)
self.set_title(_('Preferences')) self.set_title(_('STT Voice Messages - Preferences'))
ui_path = Path(__file__).parent ui_path = Path(__file__).parent
self._ui = get_builder(str(ui_path.resolve() / 'config_dialog.ui')) self._ui = get_builder(str(ui_path.resolve() / 'config_dialog.ui'))
@@ -83,7 +86,7 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
('stt_behaviour', STTBehaviour), ('stt_behaviour', STTBehaviour),
('models', Models), ('models', Models),
('file_preview', FilePreview), ('file_preview', FilePreview),
('whisper_general', openai_whisper.OpenAIWhisperGeneral), ('whisper_general', OpenAIWhisperGeneral),
] ]
self._add_prefs(prefs) self._add_prefs(prefs)
@@ -92,21 +95,18 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]): def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
for ui_name, klass in prefs: for ui_name, klass in prefs:
pref_box = getattr(self._ui, ui_name) pref_box = getattr(self._ui, ui_name)
print('pref_box = ', pref_box)
if pref_box is None:
continue
pref = klass(self) # pyright: ignore pref = klass(self) # pyright: ignore
print("pref = ", pref)
pref_box.add(pref) pref_box.add(pref)
self._prefs[ui_name] = pref self._prefs[ui_name] = pref
def _on_setting(self, value: Any, data: Any) -> None: def _on_setting(self, value: Any, data: Any) -> None:
if isinstance(value, str):
value.strip()
self.plugin.config[data] = value self.plugin.config[data] = value
self.plugin.update()
################################################################################
# Preference boxes
################################################################################
class PreferenceBox(SettingsBox): class PreferenceBox(SettingsBox):
def __init__(self, settings: list[Setting]) -> None: def __init__(self, settings: list[Setting]) -> None:
SettingsBox.__init__(self, None) SettingsBox.__init__(self, None)
@@ -120,9 +120,12 @@ class PreferenceBox(SettingsBox):
self.update_states() self.update_states()
################################################################################
# General Preferences
################################################################################
class STTBehaviour(PreferenceBox): class STTBehaviour(PreferenceBox):
def __init__(self, *args: Any) -> None: def __init__(self, *args: Any) -> None:
main_window_on_startup_items = { main_window_on_startup_items = {
'always': _('Always'), 'always': _('Always'),
'never': _('Never'), 'never': _('Never'),
@@ -143,7 +146,6 @@ class STTBehaviour(PreferenceBox):
class Models(PreferenceBox): class Models(PreferenceBox):
def __init__(self, *args: Any) -> None: def __init__(self, *args: Any) -> None:
main_window_on_startup_items = { main_window_on_startup_items = {
'always': _('Always'), 'always': _('Always'),
'never': _('Never'), 'never': _('Never'),
@@ -164,7 +166,6 @@ class Models(PreferenceBox):
class FilePreview(PreferenceBox): class FilePreview(PreferenceBox):
def __init__(self, *args: Any) -> None: def __init__(self, *args: Any) -> None:
main_window_on_startup_items = { main_window_on_startup_items = {
'always': _('Always'), 'always': _('Always'),
'never': _('Never'), 'never': _('Never'),
@@ -183,3 +184,27 @@ class FilePreview(PreferenceBox):
PreferenceBox.__init__(self, settings) PreferenceBox.__init__(self, settings)
################################################################################
# Whisper Settings UI
################################################################################
class OpenAIWhisperGeneral(PreferenceBox):
def __init__(self, *args: Any) -> None:
self.config = PluginConfig().openaiwhisper
settings = [
Setting(SettingKind.POPOVER,
_('Language Model Size'),
SettingType.VALUE,
value=str(self.config['model_size']),
data='model_size',
callback=self._on_setting,
props={'entries': whisper.available_models()}),
]
PreferenceBox.__init__(self, settings)
def _on_setting(self, value: Any, data: Any) -> None:
print("before: ", self.config)
self.config[data] = value
print("after: ", self.config)

View File

@@ -12,21 +12,18 @@
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with Gajim. If not, see <http://www.gnu.org/licenses/>. # along with Gajim. If not, see <http://www.gnu.org/licenses/>.
from pathlib import Path
from gi.repository import Gtk from gi.repository import Gtk
from gajim.plugins.gajimplugin import GajimPluginConfig from gajim.plugins.gajimplugin import GajimPluginConfig
from gajim.plugins.plugins_i18n import _ from gajim.plugins.plugins_i18n import _
from .. import helper
class STTBox(Gtk.Box): class STTBox(Gtk.Box):
def __init__(self, def __init__(self,
preview_audio_widget: Gtk.Box, preview_audio_widget: Gtk.Box,
config: GajimPluginConfig, config: GajimPluginConfig,
audio_file: Path, audio_file: str,
) -> None: ) -> None:
Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12) Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12)
@@ -39,7 +36,8 @@ class STTBox(Gtk.Box):
self._transcribe_button = Gtk.Button(label=_('Transcribe')) self._transcribe_button = Gtk.Button(label=_('Transcribe'))
self._transcription_label = Gtk.Label(label=_('Nothing transcribed yet')) self._transcription_label = Gtk.Label(
label=_('Nothing transcribed yet'))
self._transcription_label.set_max_width_chars(40) self._transcription_label.set_max_width_chars(40)
self._transcription_label.set_line_wrap(True) self._transcription_label.set_line_wrap(True)

View File

@@ -0,0 +1,30 @@
# This file is part of Gajim.
#
# Gajim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Gajim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
from abc import ABC, abstractmethod
from pathlib import Path
from gajim.gtk.const import Setting
class Model(ABC):
@abstractmethod
def transcribe(self, audio_file: Path) -> str:
return ''
@abstractmethod
def on_setting(self, setting: Setting):
pass

View File

@@ -15,11 +15,10 @@
import typing import typing
from pathlib import Path from pathlib import Path
from typing import Any
from gajim.gtk.const import Setting, SettingKind, SettingType from .model import Model
from gajim.gtk.preferences import PreferenceBox
from gajim.plugins.plugins_i18n import _ from gajim.gtk.const import Setting
try: try:
import whisper import whisper
@@ -29,7 +28,7 @@ except ModuleNotFoundError:
import whisper import whisper
class WhisperModel: class WhisperModel(Model):
def __init__(self): def __init__(self):
self._model_sizes = ['tiny', 'small', 'base', 'medium', 'large'] self._model_sizes = ['tiny', 'small', 'base', 'medium', 'large']
self._multilanguage = True self._multilanguage = True
@@ -43,50 +42,6 @@ class WhisperModel:
result = model.transcribe(audio_file) result = model.transcribe(audio_file)
return result["text"] return result["text"]
def _build_config(self) -> list[Setting]: def on_setting(self, setting: Setting):
whisper_model_sizes = ('tiny', 'base', 'small', 'medium', 'large')
settings = [
Setting(SettingKind.COMBO,
_('Language Model'),
SettingType.VALUE,
callback=self._on_setting,
props={'combo_items': whisper_model_sizes}),
Setting(SettingKind.SWITCH,
_('Use Multilanguage Model'),
SettingType.VALUE,
self._multilanguage,
callback=self._on_setting,
data='use_multilanguage_model'),
]
return settings
def _on_setting(self, setting: Setting):
pass pass
################################################################################
# Whisper Settings UI
################################################################################
class OpenAIWhisperGeneral(PreferenceBox):
def __init__(self, *args: Any) -> None:
model_sizes = {
'tiny': _('Tiny'),
'small': _('Small'),
'basic': _('Basic'),
'medium': _('Medium'),
'large': _('Large'),
}
settings = [
Setting(SettingKind.POPOVER,
_('Language Model'),
SettingType.VALUE,
callback=None,
props={'entries': model_sizes},
desc=_('Model Size')),
]
PreferenceBox.__init__(self, settings)

View File

@@ -32,29 +32,28 @@ log = logging.getLogger('gajim.p.stt_voice_messages')
class STTVoiceMessagesPlugin(GajimPlugin): class STTVoiceMessagesPlugin(GajimPlugin):
def init(self) -> None: def init(self) -> None:
self.description = _('Transcribes voice messages to text.') self.description = _('Transcribes voice messages to text.')
self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog, self) self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog,
self)
self.gui_extension_points = { self.gui_extension_points = {
'preview_audio': (self._preview_audio_created, None), 'preview_audio': (self._on_preview_audio_created, None),
} }
self._audio_file = None self._audio_file: str = ''
self._preview_audio_widget = None self._preview_audio_widget = None
self._stt_box = None self._stt_box = None
def _preview_audio_created(self, def _on_preview_audio_created(self,
preview_audio_widget: Gtk.Box, preview_audio_widget: Gtk.Box,
audio_file: Path audio_file: Path
) -> None: ) -> None:
self._preview_audio_widget = preview_audio_widget self._preview_audio_widget = preview_audio_widget
self._audio_file = audio_file.as_posix() self._audio_file = audio_file.as_posix()
#self._create_stt_box() self._create_stt_box()
#def _create_stt_box(self) -> None:
# assert self._preview_audio_widget is not None
# self._stt_box = sttbox.STTBox(self._preview_audio_widget,
# self.config,
# self._audio_file)
# self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)
def _create_stt_box(self) -> None:
assert self._preview_audio_widget is not None
self._stt_box = sttbox.STTBox(self._preview_audio_widget,
self.config,
self._audio_file)
self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)