First steps to multi model plugin

This commit is contained in:
mesonium
2024-06-12 19:57:25 +02:00
committed by hueso
parent 6667e01304
commit d510b74d3d
11 changed files with 690 additions and 172 deletions

View File

@@ -1,63 +0,0 @@
# This file is part of Gajim.
#
# Gajim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Gajim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
from __future__ import annotations
from typing import Any
from typing import TYPE_CHECKING
from gi.repository import Gtk
from gajim.plugins.plugins_i18n import _
from gajim.gtk.settings import SettingsDialog
from gajim.gtk.const import Setting
from gajim.gtk.const import SettingKind
from gajim.gtk.const import SettingType
if TYPE_CHECKING:
from .stt_voice_messages import STTVoiceMessagesPlugin
class STTVoiceMessagesConfigDialog(SettingsDialog):
def __init__(self, plugin: STTVoiceMessagesPlugin, parent: Gtk.Window) -> None:
type_values = ('tiny', 'base', 'small', 'medium', 'large')
self.plugin = plugin
settings = [
Setting(SettingKind.COMBO,
_('Language Model'),
SettingType.VALUE,
callback=self._on_setting,
props={'combo_items': type_values}),
Setting(SettingKind.SWITCH,
_('Use Multilanguage Model'),
SettingType.VALUE,
self.plugin.config['use_multilanguage_model'],
callback=self._on_setting,
data='use_multilanguage_model'),
]
SettingsDialog.__init__(self,
parent,
_('STT Voice Message Configuration'),
Gtk.DialogFlags.MODAL,
settings,
'')
def _on_setting(self, value: Any, data: Any) -> None:
self.plugin.config[data] = value

View File

View File

@@ -0,0 +1,185 @@
# This file is part of Gajim.
#
# Gajim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Gajim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
from __future__ import annotations
import logging
from pathlib import Path
from typing import TYPE_CHECKING, Any
from gi.repository import Gtk
from gajim.common import app
from gajim.gtk.builder import get_builder
from gajim.gtk.const import Setting, SettingKind, SettingType
from gajim.gtk.settings import SettingsBox
from gajim.gtk.sidebar_switcher import SideBarSwitcher
from gajim.plugins.helpers import get_builder
from gajim.plugins.plugins_i18n import _
from .. import stt_voice_messages
from ..models import openai_whisper
if TYPE_CHECKING:
from .. import stt_voice_messages
log = logging.getLogger('gajim.p.stt_voice_messages_config')
################################################################################
# Helper
################################################################################
def check_module(module: str) -> bool:
try:
__import__(module)
return True
except ModuleNotFoundError:
log.debug('Could not find module %s', module)
return False
except ImportError as ex:
log.debug(str(ex))
return False
################################################################################
# Plugin Settings
################################################################################
class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
def __init__(self, plugin: stt_voice_messages.STTVoiceMessagesPlugin,
parent: Gtk.Window) -> None:
Gtk.ApplicationWindow.__init__(self)
self.plugin = plugin
self.set_application(app.app)
self.set_position(Gtk.WindowPosition.CENTER)
self.set_show_menubar(False)
self.set_name('PreferencesWindow')
self.set_default_size(900, 650)
self.set_resizable(True)
self.set_title(_('Preferences'))
ui_path = Path(__file__).parent
self._ui = get_builder(str(ui_path.resolve() / 'config_dialog.ui'))
self._prefs: dict[str, PreferenceBox] = {}
side_bar_switcher = SideBarSwitcher()
side_bar_switcher.set_stack(self._ui.stack)
self._ui.grid.attach(side_bar_switcher, 0, 0, 1, 1)
self.add(self._ui.grid)
prefs: list[tuple[str, type[PreferenceBox]]] = [
('stt_behaviour', STTBehaviour),
('models', Models),
('file_preview', FilePreview),
('whisper_general', openai_whisper.OpenAIWhisperGeneral),
]
self._add_prefs(prefs)
self.show_all()
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
for ui_name, klass in prefs:
pref_box = getattr(self._ui, ui_name)
print('pref_box = ', pref_box)
if pref_box is None:
continue
pref = klass(self) # pyright: ignore
pref_box.add(pref)
self._prefs[ui_name] = pref
def _on_setting(self, value: Any, data: Any) -> None:
self.plugin.config[data] = value
################################################################################
# Preference boxes
################################################################################
class PreferenceBox(SettingsBox):
def __init__(self, settings: list[Setting]) -> None:
SettingsBox.__init__(self, None)
self.get_style_context().add_class('border')
self.set_selection_mode(Gtk.SelectionMode.NONE)
self.set_vexpand(False)
self.set_valign(Gtk.Align.END)
for setting in settings:
self.add_setting(setting)
self.update_states()
class STTBehaviour(PreferenceBox):
def __init__(self, *args: Any) -> None:
main_window_on_startup_items = {
'always': _('Always'),
'never': _('Never'),
'last_state': _('Restore last state'),
}
settings = [
Setting(SettingKind.POPOVER,
_('Show on Startup'),
SettingType.CONFIG,
'show_main_window_on_startup',
props={'entries': main_window_on_startup_items},
desc=_('Show window when starting Gajim')),
]
PreferenceBox.__init__(self, settings)
class Models(PreferenceBox):
def __init__(self, *args: Any) -> None:
main_window_on_startup_items = {
'always': _('Always'),
'never': _('Never'),
'last_state': _('Restore last state'),
}
settings = [
Setting(SettingKind.POPOVER,
_('Show on Startup'),
SettingType.CONFIG,
'show_main_window_on_startup',
props={'entries': main_window_on_startup_items},
desc=_('Show window when starting Gajim')),
]
PreferenceBox.__init__(self, settings)
class FilePreview(PreferenceBox):
def __init__(self, *args: Any) -> None:
main_window_on_startup_items = {
'always': _('Always'),
'never': _('Never'),
'last_state': _('Restore last state'),
}
settings = [
Setting(SettingKind.POPOVER,
_('Show on Startup'),
SettingType.CONFIG,
'show_main_window_on_startup',
props={'entries': main_window_on_startup_items},
desc=_('Show window when starting Gajim')),
]
PreferenceBox.__init__(self, settings)

View File

@@ -0,0 +1,271 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Generated with glade 3.40.0 -->
<interface>
<requires lib="gtk+" version="3.20"/>
<!-- n-columns=3 n-rows=3 -->
<object class="GtkGrid" id="grid">
<property name="visible">True</property>
<property name="can-focus">False</property>
<child>
<object class="GtkStack" id="stack">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="hexpand">True</property>
<child>
<object class="GtkScrolledWindow">
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="hscrollbar-policy">never</property>
<property name="shadow-type">in</property>
<property name="overlay-scrolling">False</property>
<child>
<object class="GtkViewport">
<property name="visible">True</property>
<property name="can-focus">False</property>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<property name="spacing">24</property>
<child>
<!-- n-columns=1 n-rows=1 -->
<object class="GtkGrid" id="stt_behaviour">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<property name="row-spacing">12</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">Behaviour of STT Voice Messages</property>
<property name="xalign">0</property>
<style>
<class name="bold"/>
</style>
</object>
<packing>
<property name="left-attach">0</property>
<property name="top-attach">0</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<!-- n-columns=1 n-rows=1 -->
<object class="GtkGrid" id="models">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<property name="row-spacing">12</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">General Model Configuration</property>
<property name="xalign">0</property>
<style>
<class name="bold"/>
</style>
</object>
<packing>
<property name="left-attach">0</property>
<property name="top-attach">0</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
<child>
<!-- n-columns=1 n-rows=1 -->
<object class="GtkGrid" id="file_preview">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<property name="row-spacing">12</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">Preview UI</property>
<property name="xalign">0</property>
<style>
<class name="bold"/>
</style>
</object>
<packing>
<property name="left-attach">0</property>
<property name="top-attach">0</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">2</property>
</packing>
</child>
</object>
</child>
</object>
</child>
</object>
<packing>
<property name="name">general</property>
<property name="title" translatable="yes">General</property>
<property name="icon-name">computer-symbolic</property>
</packing>
</child>
<child>
<object class="GtkScrolledWindow">
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="hscrollbar-policy">never</property>
<property name="shadow-type">in</property>
<property name="overlay-scrolling">False</property>
<child>
<object class="GtkViewport">
<property name="visible">True</property>
<property name="can-focus">False</property>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<property name="spacing">24</property>
<child>
<!-- n-columns=3 n-rows=3 -->
<object class="GtkGrid" id="whisper_general">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<property name="row-spacing">12</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">General</property>
<property name="xalign">0</property>
<style>
<class name="bold"/>
</style>
</object>
<packing>
<property name="left-attach">0</property>
<property name="top-attach">0</property>
</packing>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
</object>
</child>
</object>
</child>
</object>
<packing>
<property name="name">openai-whisper</property>
<property name="title" translatable="yes">openAI Whisper</property>
<property name="position">1</property>
</packing>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<style>
<class name="settings-stack"/>
</style>
</object>
<packing>
<property name="left-attach">1</property>
<property name="top-attach">0</property>
</packing>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
</object>
</interface>

View File

@@ -0,0 +1,65 @@
# This file is part of Gajim.
#
# Gajim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Gajim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
from pathlib import Path
from gi.repository import Gtk
from gajim.plugins.gajimplugin import GajimPluginConfig
from gajim.plugins.plugins_i18n import _
from .. import helper
class STTBox(Gtk.Box):
def __init__(self,
preview_audio_widget: Gtk.Box,
config: GajimPluginConfig,
audio_file: Path,
) -> None:
Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12)
self._config = config
self._preview_audio = preview_audio_widget
self._model = None
self._audio_file = audio_file
self._text = ''
self._transcribe_button = Gtk.Button(label=_('Transcribe'))
self._transcription_label = Gtk.Label(label=_('Nothing transcribed yet'))
self._transcription_label.set_max_width_chars(40)
self._transcription_label.set_line_wrap(True)
self.add(self._transcribe_button)
self.add(self._transcription_label)
self._transcribe_button.connect('clicked', self._on_transcribe_clicked)
self.show_all()
def _on_transcribe_clicked(self, _button: Gtk.Button):
#transcription_task = helper.BackgroundTask(
# self._model.transcribe(),
# self._show_result
#)
#transcription_task.start()
pass
def _show_result(self):
if self._text.strip() != '':
self._transcription_label.set_text(self._text.strip())
else:
self._transcription_label.set_text(_('_Have not heard any word!_'))

View File

@@ -0,0 +1,65 @@
# This file is part of Gajim.
#
# Gajim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Gajim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
from gi.repository import Gio, GObject
'''
https://discourse.gnome.org/t/gtk-threading-problem-with-glib-idle-add/13597/5
https://github.com/gdm-settings/gdm-settings/blob/f245d3000200fa6be2a35c7f6ac45b131dadb5d6/src/utils.py#L116..L162
'''
class BackgroundTask(GObject.Object):
__gtype_name__ = 'BackgroundTask'
def __init__(self, function, finish_callback, **kwargs):
super().__init__(**kwargs)
self.function = function
self.finish_callback = finish_callback
self._current = None
def start(self):
if self._current:
AlreadyRunningError('Task is already running')
finish_callback = lambda self, task, nothing: self.finish_callback()
task = Gio.Task.new(self, None, finish_callback, None)
task.run_in_thread(self._thread_cb)
self._current = task
@staticmethod
def _thread_cb(task, self, task_data, cancellable):
try:
retval = self.function()
task.return_value(retval)
except Exception as e:
task.return_value(e)
def finish(self):
task = self._current
self._current = None
if not Gio.Task.is_valid(task, self):
raise InvalidGioTaskError()
value = task.propagate_value().value
if isinstance(value, Exception):
raise value
return value

View File

View File

@@ -0,0 +1,92 @@
# This file is part of Gajim.
#
# Gajim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Gajim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
import typing
from pathlib import Path
from typing import Any
from gajim.gtk.const import Setting, SettingKind, SettingType
from gajim.gtk.preferences import PreferenceBox
from gajim.plugins.plugins_i18n import _
try:
import whisper
OPENAI_WHISPER_AVAILABLE = True
except ModuleNotFoundError:
if typing.TYPE_CHECKING:
import whisper
class WhisperModel:
def __init__(self):
self._model_sizes = ['tiny', 'small', 'base', 'medium', 'large']
self._multilanguage = True
self._config = {
'model_size': 'tiny'
}
def transcribe(self, audio_file: Path) -> str:
model = whisper.load_model(self._config['model_size'])
result = model.transcribe(audio_file)
return result["text"]
def _build_config(self) -> list[Setting]:
whisper_model_sizes = ('tiny', 'base', 'small', 'medium', 'large')
settings = [
Setting(SettingKind.COMBO,
_('Language Model'),
SettingType.VALUE,
callback=self._on_setting,
props={'combo_items': whisper_model_sizes}),
Setting(SettingKind.SWITCH,
_('Use Multilanguage Model'),
SettingType.VALUE,
self._multilanguage,
callback=self._on_setting,
data='use_multilanguage_model'),
]
return settings
def _on_setting(self, setting: Setting):
pass
################################################################################
# Whisper Settings UI
################################################################################
class OpenAIWhisperGeneral(PreferenceBox):
def __init__(self, *args: Any) -> None:
model_sizes = {
'tiny': _('Tiny'),
'small': _('Small'),
'basic': _('Basic'),
'medium': _('Medium'),
'large': _('Large'),
}
settings = [
Setting(SettingKind.POPOVER,
_('Language Model'),
SettingType.VALUE,
callback=None,
props={'entries': model_sizes},
desc=_('Model Size')),
]
PreferenceBox.__init__(self, settings)

View File

@@ -13,7 +13,7 @@
"win32" "win32"
], ],
"requirements": [ "requirements": [
"gajim>=1.4.0" "gajim>=1.9.0"
], ],
"short_name": "stt_voice_messages", "short_name": "stt_voice_messages",
"version": "0.0.1" "version": "0.0.1"

View File

Before

Width:  |  Height:  |  Size: 371 B

After

Width:  |  Height:  |  Size: 371 B

View File

@@ -19,30 +19,25 @@ import logging
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
import whisper from gi.repository import Gtk
from gi.repository import Gio, GObject, Gtk
from stt_voice_messages.config_dialog import STTVoiceMessagesConfigDialog
from gajim.plugins import GajimPlugin from gajim.plugins import GajimPlugin
from gajim.plugins.plugins_i18n import _ from gajim.plugins.plugins_i18n import _
from .gtk import config_dialog, sttbox
log = logging.getLogger('gajim.p.stt_voice_messages') log = logging.getLogger('gajim.p.stt_voice_messages')
class STTVoiceMessagesPlugin(GajimPlugin): class STTVoiceMessagesPlugin(GajimPlugin):
def init(self) -> None: def init(self) -> None:
self.description = _('Transcribes voice messages to text.') self.description = _('Transcribes voice messages to text.')
self.config_dialog = partial(STTVoiceMessagesConfigDialog, self) self.config_dialog = partial(config_dialog.STTVoiceMessagesConfigDialog, self)
self.gui_extension_points = { self.gui_extension_points = {
'preview_audio': (self._preview_audio_created, None), 'preview_audio': (self._preview_audio_created, None),
} }
self.config_default_values = {
'use_multilanguage_model': (True, ''),
'model_size': ('base', '')
}
self._audio_file = None self._audio_file = None
self._preview_audio_widget = None self._preview_audio_widget = None
self._stt_box = None self._stt_box = None
@@ -53,105 +48,13 @@ class STTVoiceMessagesPlugin(GajimPlugin):
) -> None: ) -> None:
self._preview_audio_widget = preview_audio_widget self._preview_audio_widget = preview_audio_widget
self._audio_file = audio_file.as_posix() self._audio_file = audio_file.as_posix()
self._create_stt_box() #self._create_stt_box()
def _create_stt_box(self) -> None: #def _create_stt_box(self) -> None:
assert self._preview_audio_widget is not None # assert self._preview_audio_widget is not None
self._stt_box = STTBox(self._preview_audio_widget, # self._stt_box = sttbox.STTBox(self._preview_audio_widget,
self.config, # self.config,
self._audio_file) # self._audio_file)
self._preview_audio_widget.pack_end(self._stt_box, False, False, 0) # self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)
class STTBox(Gtk.Box):
def __init__(self,
preview_audio_widget: Gtk.Box,
config: GajimPluginConfig,
audio_file: Path,
) -> None:
Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12)
self._config = config
self._preview_audio = preview_audio_widget
self._audio_file = audio_file
self._text = ''
self._transcribe_button = Gtk.Button(label=_('Transcribe'))
self._transcription_label = Gtk.Label(label=_('Nothing transcribed yet'))
self._transcription_label.set_max_width_chars(40)
self._transcription_label.set_line_wrap(True)
self.add(self._transcribe_button)
self.add(self._transcription_label)
self._transcribe_button.connect('clicked', self._on_transcribe_clicked)
self.show_all()
def _on_transcribe_clicked(self, _button: Gtk.Button):
transcription_task = BackgroundTask(
self._trascribe_by_whisper,
self._show_result
)
transcription_task.start()
def _show_result(self):
if self._text.strip() != '':
self._transcription_label.set_text(self._text.strip())
else:
self._transcription_label.set_text(_('_Have not heard any word!_'))
def _trascribe_by_whisper(self) -> str:
model = whisper.load_model(self._config['model_size'])
result = model.transcribe(self._audio_file)
self._text = result["text"]
return text
'''
https://discourse.gnome.org/t/gtk-threading-problem-with-glib-idle-add/13597/5
https://github.com/gdm-settings/gdm-settings/blob/f245d3000200fa6be2a35c7f6ac45b131dadb5d6/src/utils.py#L116..L162
'''
class BackgroundTask (GObject.Object):
__gtype_name__ = 'BackgroundTask'
def __init__ (self, function, finish_callback, **kwargs):
super().__init__(**kwargs)
self.function = function
self.finish_callback = finish_callback
self._current = None
def start(self):
if self._current:
AlreadyRunningError('Task is already running')
finish_callback = lambda self, task, nothing: self.finish_callback()
task = Gio.Task.new(self, None, finish_callback, None)
task.run_in_thread(self._thread_cb)
self._current = task
@staticmethod
def _thread_cb (task, self, task_data, cancellable):
try:
retval = self.function()
task.return_value(retval)
except Exception as e:
task.return_value(e)
def finish (self):
task = self._current
self._current = None
if not Gio.Task.is_valid(task, self):
raise InvalidGioTaskError()
value = task.propagate_value().value
if isinstance(value, Exception):
raise value
return value