WIP: parakeeet
This commit is contained in:
@@ -18,33 +18,23 @@ from __future__ import annotations
|
||||
import logging
|
||||
import typing
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
try:
|
||||
import whisper
|
||||
import onnx_asr
|
||||
except ModuleNotFoundError:
|
||||
if typing.TYPE_CHECKING:
|
||||
import whisper
|
||||
import onnx_asr
|
||||
|
||||
try:
|
||||
import faster_whisper as fwhisper
|
||||
except ModuleNotFoundError:
|
||||
if typing.TYPE_CHECKING:
|
||||
import faster_whisper as fwhisper
|
||||
from gi.repository import Adw, Gtk
|
||||
|
||||
from gi.repository import Gtk
|
||||
|
||||
from gajim.common import app
|
||||
from gajim.common.app import Any
|
||||
from gajim.gtk.builder import get_builder
|
||||
from gajim.gtk.const import Setting, SettingKind, SettingType
|
||||
from gajim.gtk.settings import SettingsBox
|
||||
from gajim.gtk.sidebar_switcher import SideBarSwitcher
|
||||
from gajim.plugins.helpers import get_builder
|
||||
from gajim.gtk.filechoosers import Filter
|
||||
from gajim.gtk.settings import GajimPreferencesGroup, SettingsDialog
|
||||
from gajim.plugins.plugins_i18n import _
|
||||
|
||||
from ..models import faster_whisper, openai_whisper
|
||||
from ..models.model_settings import *
|
||||
from ..models import stt
|
||||
from ..models.model_settings import OnnxAsrSettings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..stt_voice_messages import STTVoiceMessagesPlugin
|
||||
@@ -52,271 +42,250 @@ if TYPE_CHECKING:
|
||||
log = logging.getLogger('gajim.p.sttvm_config_dialog')
|
||||
|
||||
|
||||
@dataclass
|
||||
class Model:
|
||||
name: str
|
||||
required_moduls: list[str]
|
||||
klass: object
|
||||
config: Any
|
||||
instance: typing.Optional[object] = None
|
||||
|
||||
|
||||
SUPPORTED_MODELS: dict[str, Model] = {
|
||||
'model_openaiwhisper': Model('OpenAI Whisper',
|
||||
['whisper'],
|
||||
openai_whisper.WhisperModel,
|
||||
OpenAIWhisperSettings),
|
||||
'model_faster-whisper': Model('Faster-Whisper',
|
||||
['faster_whisper'],
|
||||
faster_whisper.FasterWhisperModel,
|
||||
FasterWhisperSettings)
|
||||
}
|
||||
|
||||
|
||||
class Configuration:
|
||||
def __init__(self, plugin: STTVoiceMessagesPlugin):
|
||||
self._plugin = plugin
|
||||
|
||||
self._available_models: dict[str, Model] = {}
|
||||
self.check_available_moduls()
|
||||
|
||||
log.debug('config = %s', self._plugin.config)
|
||||
self._instance = None
|
||||
self._main_model_row = None
|
||||
self._preset_model_picker = None
|
||||
self._custom_model_id_entry = None
|
||||
self._local_model_file_picker = None
|
||||
self._status_group = None
|
||||
self._model_data: dict[str, str] = {}
|
||||
self._instance = stt.OnnxAsrModel()
|
||||
self._instance.set_config(OnnxAsrSettings(
|
||||
model_id=self.plugin.config['model_id'],
|
||||
model_path=self.plugin.config['model_path']
|
||||
))
|
||||
self._model_data = self._steal_model_list()
|
||||
|
||||
@property
|
||||
def plugin(self) -> STTVoiceMessagesPlugin:
|
||||
return self._plugin
|
||||
|
||||
@property
|
||||
def available_models(self) -> dict[str, Model]:
|
||||
return self._available_models
|
||||
def is_available(self) -> bool:
|
||||
return self._instance is not None
|
||||
|
||||
def unload_model(self) -> None:
|
||||
if self._instance is not None:
|
||||
self._instance.unload_now()
|
||||
|
||||
def _steal_model_list(self) -> dict[str, str]:
|
||||
# UGLY: Extract available model choices from onnx_asr type hints.
|
||||
ann = onnx_asr.load_model.__annotations__.get('model')
|
||||
return {
|
||||
v: v for arg in typing.get_args(ann)
|
||||
for v in typing.get_args(arg)
|
||||
if isinstance(v, str)
|
||||
}
|
||||
|
||||
def on_setting(self, value: Any, data: Any) -> None:
|
||||
if isinstance(value, str):
|
||||
value.strip()
|
||||
|
||||
log.debug('plugin config before:\n %s', self.plugin.config.data)
|
||||
value = value.strip()
|
||||
self.plugin.config[data] = value
|
||||
log.debug('plugin config after:\n %s', self.plugin.config.data)
|
||||
|
||||
def on_config_model(self, model: str, value: Any, data: Any) -> None:
|
||||
if isinstance(value, str):
|
||||
value.strip()
|
||||
def on_preset_changed(self, value: str, data: Any) -> None:
|
||||
if self._custom_model_id_entry is not None:
|
||||
entry_text = self._custom_model_id_entry.entry.get_text().strip()
|
||||
if entry_text:
|
||||
self._update_model_status()
|
||||
return # custom entry overrides; ignore preset change
|
||||
self._write_model_id(value)
|
||||
self._update_model_status()
|
||||
|
||||
log.debug('plugin config before:\n %s', self.plugin.config.data[model])
|
||||
setattr(self.plugin.config.data[model], data, value)
|
||||
log.debug('plugin config after:\n %s', self.plugin.config.data[model])
|
||||
def on_custom_model_id_changed(self, value: str, data: Any) -> None:
|
||||
value = value.strip()
|
||||
if value:
|
||||
self._write_model_id(value)
|
||||
elif self._preset_model_picker is not None:
|
||||
preset_key = self._preset_model_picker._dropdown.get_selected_key()
|
||||
if preset_key is not None:
|
||||
self._write_model_id(preset_key)
|
||||
self._apply_sensitivity_state()
|
||||
self._update_model_status()
|
||||
|
||||
self._plugin.config.data[model].instance.set_config(self.plugin.config.data[model])
|
||||
def on_model_file_picked(self, value: str, data: Any) -> None:
|
||||
self._write_model_path(str(Path(value).parent) if value else '')
|
||||
self._apply_sensitivity_state()
|
||||
self._update_model_status()
|
||||
|
||||
def create_model(self, model: Any) -> None:
|
||||
if (self.plugin.config.data[model].instance is None and
|
||||
self._available_models[model].klass is not None):
|
||||
self.plugin.config.data[model].instance = \
|
||||
self._available_models[model].klass()
|
||||
def _write_model_id(self, model_id: str) -> None:
|
||||
if self.plugin.config['model_id'] == model_id:
|
||||
return
|
||||
self.plugin.config['model_id'] = model_id
|
||||
if self._instance is not None:
|
||||
self._instance.set_config(OnnxAsrSettings(
|
||||
model_id=self.plugin.config['model_id'],
|
||||
model_path=self.plugin.config['model_path']
|
||||
))
|
||||
|
||||
def _write_model_path(self, model_path: str) -> None:
|
||||
if self.plugin.config['model_path'] == model_path:
|
||||
return
|
||||
self.plugin.config['model_path'] = model_path
|
||||
if self._instance is not None:
|
||||
self._instance.set_config(OnnxAsrSettings(
|
||||
model_id=self.plugin.config['model_id'],
|
||||
model_path=self.plugin.config['model_path']
|
||||
))
|
||||
|
||||
def sync_model_path_from_widget(self) -> None:
|
||||
if self._local_model_file_picker is None:
|
||||
return
|
||||
button = self._local_model_file_picker.get_activatable_widget()
|
||||
path = button.get_path()
|
||||
new_path = str(path.parent) if path else ''
|
||||
self._write_model_path(new_path)
|
||||
|
||||
def _apply_sensitivity_state(self) -> None:
|
||||
if self._preset_model_picker is None:
|
||||
return
|
||||
has_local = bool(self.plugin.config['model_path'])
|
||||
entry_text = (self._custom_model_id_entry.entry.get_text().strip()
|
||||
if self._custom_model_id_entry else '')
|
||||
has_entry = bool(entry_text)
|
||||
self._custom_model_id_entry.set_sensitive(not has_local)
|
||||
self._preset_model_picker.set_sensitive(not has_local and not has_entry)
|
||||
|
||||
def _update_model_status(self) -> None:
|
||||
if self._main_model_row is None:
|
||||
return
|
||||
entry_text = (self._custom_model_id_entry.entry.get_text().strip()
|
||||
if self._custom_model_id_entry else '')
|
||||
|
||||
if self.plugin.config['model_path']:
|
||||
path = Path(self.plugin.config['model_path'])
|
||||
summary = _('Local: {}').format(path.name or str(path))
|
||||
description = _('Loading model files from {}').format(path)
|
||||
if not (path / 'config.json').exists():
|
||||
description += '\n' + _(
|
||||
'config.json not found in this directory — onnx-asr will'
|
||||
' fall back to Model preset or Custom Model ID for the'
|
||||
' architecture.')
|
||||
elif entry_text:
|
||||
summary = _('Custom: {}').format(entry_text)
|
||||
description = _('Using custom model: {}').format(entry_text)
|
||||
else:
|
||||
log.debug('Could not create model %s', model)
|
||||
preset_key = (self._preset_model_picker._dropdown.get_selected_key()
|
||||
if self._preset_model_picker else '')
|
||||
summary = preset_key or _('(none)')
|
||||
description = (_('Using preset: {}').format(preset_key)
|
||||
if preset_key else '')
|
||||
|
||||
def on_set_model(self, model: Any, data: str = 'model') -> None:
|
||||
if isinstance(model, str):
|
||||
model.strip()
|
||||
|
||||
self.plugin.config['model'] = model
|
||||
log.debug('Created model %s with config %s', model, self.plugin.config.data[model])
|
||||
|
||||
def check_available_moduls(self):
|
||||
def is_module_available(module: str) -> bool:
|
||||
try:
|
||||
__import__(module)
|
||||
return True
|
||||
except ModuleNotFoundError:
|
||||
log.debug('Could not find module %s', module)
|
||||
return False
|
||||
except ImportError as ex:
|
||||
log.debug(str(ex))
|
||||
return False
|
||||
|
||||
for model in SUPPORTED_MODELS:
|
||||
available = True
|
||||
for modul in SUPPORTED_MODELS[model].required_moduls:
|
||||
if not is_module_available(modul):
|
||||
available = False
|
||||
continue
|
||||
if available:
|
||||
self._available_models[model] = SUPPORTED_MODELS[model]
|
||||
if SUPPORTED_MODELS[model].config is not None:
|
||||
log.debug('created config for model = %s: %s', model, self._available_models[model])
|
||||
log.debug('plugin config for model = %s', self.plugin.config[model])
|
||||
self.plugin.config.data[model].instance = None
|
||||
self._available_models[model].config = self.plugin.config[model]
|
||||
self.create_model(model)
|
||||
|
||||
self.on_set_model(self._plugin.config['model'])
|
||||
|
||||
log.debug('models = %s', self._available_models)
|
||||
self._main_model_row._label.set_text(summary)
|
||||
if self._status_group is not None:
|
||||
self._status_group.set_description(description)
|
||||
|
||||
|
||||
class PreferenceBox(SettingsBox):
|
||||
def __init__(self, settings: list[Setting]) -> None:
|
||||
SettingsBox.__init__(self, None)
|
||||
self.get_style_context().add_class('border')
|
||||
self.set_selection_mode(Gtk.SelectionMode.NONE)
|
||||
self.set_vexpand(False)
|
||||
self.set_valign(Gtk.Align.END)
|
||||
|
||||
for setting in settings:
|
||||
self.add_setting(setting)
|
||||
self.update_states()
|
||||
|
||||
|
||||
class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
|
||||
class STTVoiceMessagesConfigDialog(SettingsDialog):
|
||||
def __init__(self, config: Configuration, parent: Gtk.Window) -> None:
|
||||
Gtk.ApplicationWindow.__init__(self)
|
||||
|
||||
self.set_application(app.app)
|
||||
self.set_position(Gtk.WindowPosition.CENTER)
|
||||
self.set_show_menubar(False)
|
||||
self.set_name('PreferencesWindow')
|
||||
self.set_default_size(900, 650)
|
||||
self.set_resizable(True)
|
||||
self.set_title(_('STT Voice Messages - Preferences'))
|
||||
|
||||
ui_path = Path(__file__).parent
|
||||
self._ui = get_builder(str(ui_path.resolve() / 'config_dialog.ui'))
|
||||
|
||||
self._prefs: dict[str, PreferenceBox] = {}
|
||||
prefs: list[tuple[str, type[PreferenceBox]]] = [
|
||||
('stt_behaviour', self.STTBehaviour),
|
||||
('models', self.Models),
|
||||
]
|
||||
|
||||
if 'model_openaiwhisper' in config.available_models:
|
||||
prefs.append(('openaiwhisper_general', self.OpenAIWhisperGeneral))
|
||||
else:
|
||||
self._ui.stack.remove(getattr(self._ui, 'openai-whisper'))
|
||||
|
||||
if 'model_faster-whisper' in config.available_models:
|
||||
prefs.append(('fasterwhisper_general', self.FasterWhisperGeneral))
|
||||
else:
|
||||
self._ui.stack.remove(getattr(self._ui, 'faster-whisper'))
|
||||
|
||||
side_bar_switcher = SideBarSwitcher()
|
||||
side_bar_switcher.set_stack(self._ui.stack)
|
||||
self._ui.grid.attach(side_bar_switcher, 0, 0, 1, 1)
|
||||
self.add(self._ui.grid)
|
||||
|
||||
self.config = config
|
||||
self.plugin = self.config.plugin
|
||||
self._add_prefs(prefs)
|
||||
if not config.is_available:
|
||||
return
|
||||
|
||||
self.show_all()
|
||||
rows = [
|
||||
Setting(SettingKind.SWITCH,
|
||||
_('Auto Transcribe'),
|
||||
SettingType.VALUE,
|
||||
value=self.plugin.config['auto_transcribe'],
|
||||
data='auto_transcribe',
|
||||
callback=config.on_setting,
|
||||
desc=_('Transcribe messages as they appear')),
|
||||
Setting(SettingKind.SUBPAGE,
|
||||
_('Model'),
|
||||
SettingType.VALUE,
|
||||
value=None,
|
||||
name='main_model',
|
||||
props={'subpage': 'sttvm-model'}),
|
||||
]
|
||||
|
||||
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
|
||||
for ui_name, klass in prefs:
|
||||
pref_box = getattr(self._ui, ui_name)
|
||||
pref = klass(self) # pyright: ignore
|
||||
log.debug('ui_name = %s, klass = %s, pref_box = %s', ui_name, klass, pref_box)
|
||||
pref_box.add(pref)
|
||||
self._prefs[ui_name] = pref
|
||||
SettingsDialog.__init__(
|
||||
self,
|
||||
parent,
|
||||
_('STT Voice Messages'),
|
||||
Gtk.DialogFlags.MODAL,
|
||||
rows,
|
||||
'',
|
||||
)
|
||||
|
||||
config._main_model_row = self.get_setting('main_model')
|
||||
|
||||
use_custom = self.plugin.config['model_id'] not in config._model_data
|
||||
|
||||
|
||||
############################################################################
|
||||
# General Settings
|
||||
############################################################################
|
||||
class STTBehaviour(PreferenceBox):
|
||||
def __init__(self, config_dialog: STTVoiceMessagesConfigDialog) -> None:
|
||||
settings = [
|
||||
Setting(SettingKind.SWITCH,
|
||||
_('Auto Transcribe'),
|
||||
SettingType.VALUE,
|
||||
value=config_dialog.plugin.config['auto_transcribe'],
|
||||
data='auto_transcribe',
|
||||
callback=config_dialog.config.on_setting)
|
||||
]
|
||||
subpage_rows: list[Setting] = [
|
||||
Setting(SettingKind.DROPDOWN,
|
||||
_('Model'),
|
||||
SettingType.VALUE,
|
||||
value=self.plugin.config['model_id'],
|
||||
name='preset_model',
|
||||
callback=config.on_preset_changed,
|
||||
props={'data': config._model_data}),
|
||||
Setting(SettingKind.ENTRY,
|
||||
_('Custom Model'),
|
||||
SettingType.VALUE,
|
||||
value=self.plugin.config['model_id'] if use_custom else '',
|
||||
name='custom_model',
|
||||
callback=config.on_custom_model_id_changed,
|
||||
desc=_('Custom HF model path or model ID')),
|
||||
Setting(SettingKind.FILECHOOSER,
|
||||
_('Local File'),
|
||||
SettingType.VALUE,
|
||||
value='',
|
||||
name='local_model_file',
|
||||
callback=config.on_model_file_picked,
|
||||
desc=_('Model ID is taken from config.json if not set'),
|
||||
props={'filefilters': [
|
||||
Filter(_('ONNX model'), suffixes=['onnx'], default=True),
|
||||
]}),
|
||||
]
|
||||
|
||||
PreferenceBox.__init__(self, settings)
|
||||
controls_group = GajimPreferencesGroup('model_controls')
|
||||
for s in subpage_rows:
|
||||
controls_group.add_setting(s)
|
||||
|
||||
class Models(PreferenceBox):
|
||||
def __init__(self, config_dialog: STTVoiceMessagesConfigDialog) -> None:
|
||||
models: list[tuple[str, str]] = []
|
||||
for key, value in config_dialog.config.available_models.items():
|
||||
models.append(
|
||||
(key, str(value.name))
|
||||
)
|
||||
status_group = Adw.PreferencesGroup()
|
||||
|
||||
settings = [
|
||||
Setting(SettingKind.COMBO,
|
||||
_('Speech To Text Model'),
|
||||
SettingType.VALUE,
|
||||
value=config_dialog.plugin.config['model'],
|
||||
data='model',
|
||||
callback=config_dialog.config.on_set_model,
|
||||
props={'combo_items': models},
|
||||
desc=_('Choose Model to use')),
|
||||
]
|
||||
pref_page = Adw.PreferencesPage()
|
||||
pref_page.add(controls_group)
|
||||
pref_page.add(status_group)
|
||||
|
||||
PreferenceBox.__init__(self, settings)
|
||||
toolbar = Adw.ToolbarView(content=pref_page)
|
||||
toolbar.add_top_bar(Adw.HeaderBar())
|
||||
|
||||
############################################################################
|
||||
# OpenAI Whisper Settings
|
||||
############################################################################
|
||||
class OpenAIWhisperGeneral(PreferenceBox):
|
||||
def __init__(self, config_dialog: STTVoiceMessagesConfigDialog) -> None:
|
||||
page = Adw.NavigationPage(
|
||||
tag='sttvm-model', title=_('Model'), child=toolbar)
|
||||
self._nav.add(page)
|
||||
|
||||
self._model = 'model_openaiwhisper'
|
||||
self._config_dialog = config_dialog
|
||||
config._preset_model_picker = controls_group.get_setting('preset_model')
|
||||
config._custom_model_id_entry = controls_group.get_setting('custom_model')
|
||||
config._local_model_file_picker = controls_group.get_setting(
|
||||
'local_model_file')
|
||||
config._status_group = status_group
|
||||
|
||||
settings = [
|
||||
Setting(SettingKind.POPOVER,
|
||||
_('Language Model Size'),
|
||||
SettingType.VALUE,
|
||||
value=config_dialog.config.available_models[self._model].config.model_size,
|
||||
data='model_size',
|
||||
callback=self._set_config,
|
||||
props={'entries': whisper.available_models()}),
|
||||
config._custom_model_id_entry.entry.set_placeholder_text(
|
||||
_('onnx-community/whisper-large-v3-turbo'))
|
||||
|
||||
Setting(SettingKind.SWITCH,
|
||||
_('Translate'),
|
||||
SettingType.VALUE,
|
||||
value=config_dialog.config.available_models[self._model].config.translate_to_english,
|
||||
data='translate_to_english',
|
||||
callback=self._set_config)
|
||||
]
|
||||
button = config._local_model_file_picker.get_activatable_widget()
|
||||
button._label_text = _('.oonx')
|
||||
button.reset()
|
||||
|
||||
PreferenceBox.__init__(self, settings)
|
||||
if self.plugin.config['model_path']:
|
||||
onnx_in_dir = next(iter(Path(self.plugin.config['model_path']).glob('*.onnx')),
|
||||
None)
|
||||
if onnx_in_dir is not None:
|
||||
button.set_path(onnx_in_dir)
|
||||
|
||||
def _set_config(self, value: Any, data: Any):
|
||||
self._config_dialog.config.on_config_model(self._model, value, data)
|
||||
config._update_model_status()
|
||||
config._apply_sensitivity_state()
|
||||
|
||||
############################################################################
|
||||
# Faster Whisper Settings
|
||||
############################################################################
|
||||
class FasterWhisperGeneral(PreferenceBox):
|
||||
def __init__(self,
|
||||
config_dialog: STTVoiceMessagesConfigDialog) -> None:
|
||||
self._model = 'model_faster-whisper'
|
||||
self._config_dialog = config_dialog
|
||||
|
||||
settings = [
|
||||
Setting(SettingKind.POPOVER,
|
||||
_('Language Model Size'),
|
||||
SettingType.VALUE,
|
||||
value=config_dialog.config.available_models[
|
||||
self._model].config.model_size,
|
||||
data='model_size',
|
||||
callback=self._set_config,
|
||||
props={'entries': fwhisper.available_models()}),
|
||||
|
||||
Setting(SettingKind.SWITCH,
|
||||
_('Translate'),
|
||||
SettingType.VALUE,
|
||||
value=config_dialog.config.available_models[
|
||||
self._model].config.translate_to_english,
|
||||
data='translate_to_english',
|
||||
callback=self._set_config)
|
||||
]
|
||||
|
||||
PreferenceBox.__init__(self, settings)
|
||||
|
||||
def _set_config(self, value: Any, data: Any):
|
||||
self._config_dialog.config.on_config_model(self._model, value,
|
||||
data)
|
||||
def _cleanup(self) -> None:
|
||||
self.config.sync_model_path_from_widget()
|
||||
self.config._main_model_row = None
|
||||
self.config._preset_model_picker = None
|
||||
self.config._custom_model_id_entry = None
|
||||
self.config._local_model_file_picker = None
|
||||
self.config._status_group = None
|
||||
SettingsDialog._cleanup(self)
|
||||
|
||||
@@ -1,349 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Generated with glade 3.40.0 -->
|
||||
<interface>
|
||||
<requires lib="gtk+" version="3.20"/>
|
||||
<!-- n-columns=3 n-rows=3 -->
|
||||
<object class="GtkGrid" id="grid">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<child>
|
||||
<object class="GtkStack" id="stack">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="hexpand">True</property>
|
||||
<child>
|
||||
<object class="GtkScrolledWindow">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">True</property>
|
||||
<property name="hscrollbar-policy">never</property>
|
||||
<property name="shadow-type">in</property>
|
||||
<property name="overlay-scrolling">False</property>
|
||||
<child>
|
||||
<object class="GtkViewport">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<child>
|
||||
<object class="GtkBox">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="spacing">24</property>
|
||||
<child>
|
||||
<!-- n-columns=1 n-rows=1 -->
|
||||
<object class="GtkGrid" id="stt_behaviour">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="row-spacing">12</property>
|
||||
<child>
|
||||
<object class="GtkLabel">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="label" translatable="yes">Behaviour of STT Voice Messages</property>
|
||||
<property name="xalign">0</property>
|
||||
<style>
|
||||
<class name="bold"/>
|
||||
</style>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="left-attach">0</property>
|
||||
<property name="top-attach">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="expand">False</property>
|
||||
<property name="fill">True</property>
|
||||
<property name="position">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<!-- n-columns=1 n-rows=1 -->
|
||||
<object class="GtkGrid" id="models">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="row-spacing">12</property>
|
||||
<child>
|
||||
<object class="GtkLabel">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="label" translatable="yes">General Model Configuration</property>
|
||||
<property name="xalign">0</property>
|
||||
<style>
|
||||
<class name="bold"/>
|
||||
</style>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="left-attach">0</property>
|
||||
<property name="top-attach">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="expand">False</property>
|
||||
<property name="fill">True</property>
|
||||
<property name="position">1</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<!-- n-columns=1 n-rows=1 -->
|
||||
<object class="GtkGrid" id="file_preview">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="row-spacing">12</property>
|
||||
<child>
|
||||
<object class="GtkLabel">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="label" translatable="yes">Preview UI</property>
|
||||
<property name="xalign">0</property>
|
||||
<style>
|
||||
<class name="bold"/>
|
||||
</style>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="left-attach">0</property>
|
||||
<property name="top-attach">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="expand">False</property>
|
||||
<property name="fill">True</property>
|
||||
<property name="position">2</property>
|
||||
</packing>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="name">general</property>
|
||||
<property name="title" translatable="yes">General</property>
|
||||
<property name="icon-name">computer-symbolic</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<object class="GtkScrolledWindow" id="openai-whisper">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">True</property>
|
||||
<property name="hscrollbar-policy">never</property>
|
||||
<property name="shadow-type">in</property>
|
||||
<property name="overlay-scrolling">False</property>
|
||||
<child>
|
||||
<object class="GtkViewport" id="openai-whisper-viewport">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<child>
|
||||
<object class="GtkBox">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="spacing">24</property>
|
||||
<child>
|
||||
<!-- n-columns=3 n-rows=3 -->
|
||||
<object class="GtkGrid" id="openaiwhisper_general">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="row-spacing">12</property>
|
||||
<child>
|
||||
<object class="GtkLabel">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="label" translatable="yes">General</property>
|
||||
<property name="xalign">0</property>
|
||||
<style>
|
||||
<class name="bold"/>
|
||||
</style>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="left-attach">0</property>
|
||||
<property name="top-attach">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="expand">False</property>
|
||||
<property name="fill">True</property>
|
||||
<property name="position">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="name">openai-whisper</property>
|
||||
<property name="title" translatable="yes">OpenAI Whisper</property>
|
||||
<property name="position">1</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<object class="GtkScrolledWindow" id="faster-whisper">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">True</property>
|
||||
<property name="hscrollbar-policy">never</property>
|
||||
<property name="shadow-type">in</property>
|
||||
<property name="overlay-scrolling">False</property>
|
||||
<child>
|
||||
<object class="GtkViewport">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<child>
|
||||
<object class="GtkBox">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="spacing">24</property>
|
||||
<child>
|
||||
<!-- n-columns=3 n-rows=3 -->
|
||||
<object class="GtkGrid" id="fasterwhisper_general">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="row-spacing">12</property>
|
||||
<child>
|
||||
<object class="GtkLabel">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="label" translatable="yes">General</property>
|
||||
<property name="xalign">0</property>
|
||||
<style>
|
||||
<class name="bold"/>
|
||||
</style>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="left-attach">0</property>
|
||||
<property name="top-attach">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="expand">False</property>
|
||||
<property name="fill">True</property>
|
||||
<property name="position">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="name">faster-whisper</property>
|
||||
<property name="title" translatable="yes">Faster Whisper</property>
|
||||
<property name="position">2</property>
|
||||
</packing>
|
||||
</child>
|
||||
<style>
|
||||
<class name="settings-stack"/>
|
||||
</style>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="left-attach">1</property>
|
||||
<property name="top-attach">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
</object>
|
||||
</interface>
|
||||
@@ -13,9 +13,12 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import logging
|
||||
from __future__ import annotations
|
||||
|
||||
from gi.repository import Gtk
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from gi.repository import Gtk, Adw
|
||||
|
||||
from gajim.plugins.gajimplugin import GajimPluginConfig
|
||||
from gajim.plugins.plugins_i18n import _
|
||||
@@ -26,49 +29,62 @@ log = logging.getLogger('gajim.p.stt_voice_messages_sttbox')
|
||||
|
||||
class STTBox(Gtk.Box):
|
||||
def __init__(self,
|
||||
preview_audio_widget: Gtk.Box,
|
||||
config: GajimPluginConfig,
|
||||
audio_file: str,
|
||||
audio_file: Path,
|
||||
) -> None:
|
||||
|
||||
Gtk.Box.__init__(self, orientation=Gtk.Orientation.VERTICAL, spacing=12)
|
||||
Gtk.Box.__init__(self, orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
|
||||
|
||||
self._config = config
|
||||
self._preview_audio = preview_audio_widget
|
||||
self._model = None
|
||||
self._audio_file = audio_file
|
||||
self._text = ''
|
||||
|
||||
self._transcribe_button = Gtk.Button(label=_('Transcribe'))
|
||||
self._transcribe_button = Gtk.Button.new_from_icon_name("lucide-captions-symbolic")
|
||||
self._transcribe_button.set_tooltip_text(_('Transcribe voice message'))
|
||||
|
||||
self._spinner = Adw.Spinner(valign=Gtk.Align.START, visible=False)
|
||||
|
||||
self._transcription_label = Gtk.Label(
|
||||
label=_('Nothing transcribed yet'))
|
||||
self._transcription_label.set_max_width_chars(40)
|
||||
self._transcription_label.set_line_wrap(True)
|
||||
|
||||
self.add(self._transcribe_button)
|
||||
self.add(self._transcription_label)
|
||||
self._transcription_label.set_wrap(True)
|
||||
|
||||
self.append(self._spinner)
|
||||
self.append(self._transcription_label)
|
||||
self._transcribe_button.connect('clicked', self._on_transcribe_clicked)
|
||||
self._result = helper.Results('')
|
||||
|
||||
self._transcribe_button.connect('clicked', self._on_transcribe_clicked)
|
||||
|
||||
self.show_all()
|
||||
@property
|
||||
def button(self) -> Gtk.Button:
|
||||
return self._transcribe_button
|
||||
|
||||
def _on_transcribe_clicked(self, _button: Gtk.Button) -> None:
|
||||
log.debug('config.data = %s', self._config.data)
|
||||
model_name = self._config.data['model']
|
||||
model = self._config.data[model_name].instance
|
||||
if model is None:
|
||||
log.debug('config._instance = %s', self._config._instance)
|
||||
self._model = self._config._instance
|
||||
if self._model is None:
|
||||
return
|
||||
|
||||
self._model = model
|
||||
if self._model.is_loaded:
|
||||
text = _('Transcribing…')
|
||||
elif self._model.will_download:
|
||||
text = _('Downloading ') + self._model.model_id
|
||||
else:
|
||||
text = _('Loading model…')
|
||||
self._transcription_label.set_text(text)
|
||||
self._spinner.set_visible(True)
|
||||
self._task = helper.BackgroundTask(
|
||||
self._model.load, self._on_load_done)
|
||||
self._task.start()
|
||||
|
||||
transcription_task = helper.BackgroundTask(
|
||||
self._model.transcribe(self._result, self._audio_file),
|
||||
self._show_result
|
||||
def _on_load_done(self):
|
||||
self._transcription_label.set_text(_('Transcribing…'))
|
||||
self._task = helper.BackgroundTask(
|
||||
lambda: self._model.recognize(
|
||||
self._result, helper.load_audio(self._audio_file)),
|
||||
self._show_result,
|
||||
)
|
||||
transcription_task.start()
|
||||
self._task.start()
|
||||
|
||||
def _show_result(self):
|
||||
assert self._model is not None
|
||||
@@ -77,3 +93,4 @@ class STTBox(Gtk.Box):
|
||||
self._transcription_label.set_text(self._text.strip())
|
||||
else:
|
||||
self._transcription_label.set_text(_('_Have not heard any word!_'))
|
||||
self._spinner.set_visible(False)
|
||||
|
||||
@@ -13,16 +13,53 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import logging
|
||||
import typing
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import gi
|
||||
import numpy as np
|
||||
from gi.repository import Gio, GObject
|
||||
|
||||
try:
|
||||
gi.require_version('Gst', '1.0')
|
||||
from gi.repository import Gst
|
||||
except Exception:
|
||||
if typing.TYPE_CHECKING:
|
||||
from gi.repository import Gst
|
||||
|
||||
log = logging.getLogger('gajim.p.sttvm_helper')
|
||||
|
||||
|
||||
@dataclass
|
||||
class Results:
|
||||
text: str
|
||||
|
||||
|
||||
def load_audio(path: Path, sample_rate: int = 16000) -> np.ndarray:
|
||||
Gst.init(None)
|
||||
pipeline = Gst.parse_launch(
|
||||
'filesrc name=src ! decodebin ! audioconvert ! audioresample ! '
|
||||
f'audio/x-raw,format=F32LE,rate={sample_rate},channels=1 ! '
|
||||
'appsink name=sink sync=false'
|
||||
)
|
||||
pipeline.get_by_name('src').set_property('location', str(path))
|
||||
sink = pipeline.get_by_name('sink')
|
||||
chunks: list[np.ndarray] = []
|
||||
|
||||
pipeline.set_state(Gst.State.PLAYING)
|
||||
while (sample := sink.emit('try-pull-sample', 10 * Gst.SECOND)) is not None:
|
||||
buf = sample.get_buffer()
|
||||
_, info = buf.map(Gst.MapFlags.READ)
|
||||
chunks.append(np.frombuffer(bytes(info.data), dtype=np.float32))
|
||||
buf.unmap(info)
|
||||
pipeline.set_state(Gst.State.NULL)
|
||||
|
||||
if not chunks:
|
||||
raise RuntimeError(f'Could not decode audio: {path}')
|
||||
return np.concatenate(chunks)
|
||||
|
||||
|
||||
'''
|
||||
https://discourse.gnome.org/t/gtk-threading-problem-with-glib-idle-add/13597/5
|
||||
@@ -57,6 +94,7 @@ class BackgroundTask(GObject.Object):
|
||||
retval = self.function()
|
||||
task.return_value(retval)
|
||||
except Exception as e:
|
||||
log.exception('Background task failed')
|
||||
task.return_value(e)
|
||||
|
||||
def finish(self):
|
||||
|
||||
@@ -18,11 +18,6 @@ from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenAIWhisperSettings:
|
||||
model_size: str = field(default='tiny', init=True)
|
||||
translate_to_english: bool = field(default=False, init=True)
|
||||
|
||||
@dataclass
|
||||
class FasterWhisperSettings:
|
||||
model_size: str = field(default='tiny', init=True)
|
||||
translate_to_english: bool = field(default=False, init=True)
|
||||
class OnnxAsrSettings:
|
||||
model_id: str = field(default='nemo-parakeet-tdt-0.6b-v3', init=True)
|
||||
model_path: str = ''
|
||||
|
||||
@@ -14,16 +14,26 @@
|
||||
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..helper import Results
|
||||
|
||||
|
||||
class Model(ABC):
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def transcribe(self, result: Results, audio_file: Path) -> None:
|
||||
def is_loaded(self) -> bool:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def load(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def recognize(self, result: Results, audio: np.ndarray) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
|
||||
132
stt_voice_messages/models/stt.py
Normal file
132
stt_voice_messages/models/stt.py
Normal file
@@ -0,0 +1,132 @@
|
||||
# This file is part of Gajim.
|
||||
#
|
||||
# Gajim is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Gajim is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import logging
|
||||
import pickle
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from gi.repository import GLib
|
||||
|
||||
from ..helper import Results
|
||||
from .model_settings import OnnxAsrSettings
|
||||
from .model_template import Model
|
||||
|
||||
log = logging.getLogger('gajim.p.sttvm_onnx_asr')
|
||||
|
||||
|
||||
_IDLE_UNLOAD_SECONDS = 300
|
||||
|
||||
class OnnxAsrModel(Model):
|
||||
def __init__(self):
|
||||
self._proc = None
|
||||
self._loaded = False
|
||||
self._config = OnnxAsrSettings()
|
||||
self._unload_source = None
|
||||
|
||||
@property
|
||||
def is_loaded(self) -> bool:
|
||||
return self._loaded
|
||||
|
||||
@property
|
||||
def will_download(self) -> bool:
|
||||
if self.is_loaded or self._config.model_path:
|
||||
return False
|
||||
from huggingface_hub import try_to_load_from_cache
|
||||
from onnx_asr.resolver import model_repos
|
||||
repo = model_repos.get(self._config.model_id, self._config.model_id)
|
||||
if '/' not in repo:
|
||||
return False
|
||||
return not isinstance(try_to_load_from_cache(repo, 'config.json'), str)
|
||||
|
||||
def load(self) -> None:
|
||||
if self._loaded:
|
||||
self._reschedule_unload()
|
||||
return
|
||||
log.debug('Loading model %s in worker', self._config.model_id)
|
||||
self._send({
|
||||
'op': 'load',
|
||||
'model_id': self._config.model_id,
|
||||
'model_path': self._config.model_path,
|
||||
})
|
||||
self._loaded = True
|
||||
self._reschedule_unload()
|
||||
|
||||
def recognize(self, result: Results, audio: np.ndarray) -> None:
|
||||
self.load()
|
||||
response = self._send({'op': 'recognize', 'audio': audio})
|
||||
result.text = response['text']
|
||||
self._reschedule_unload()
|
||||
|
||||
def set_config(self, config: OnnxAsrSettings) -> None:
|
||||
if (config.model_id != self._config.model_id
|
||||
or config.model_path != self._config.model_path):
|
||||
self.unload_now()
|
||||
self._config = OnnxAsrSettings(
|
||||
model_id=config.model_id, model_path=config.model_path)
|
||||
|
||||
def unload_now(self) -> None:
|
||||
if self._unload_source is not None:
|
||||
GLib.source_remove(self._unload_source)
|
||||
self._unload_source = None
|
||||
if self._proc is not None:
|
||||
log.debug('Terminating STT worker subprocess')
|
||||
try:
|
||||
self._proc.stdin.close()
|
||||
self._proc.wait(timeout=2)
|
||||
except subprocess.TimeoutExpired:
|
||||
self._proc.kill()
|
||||
self._proc.wait()
|
||||
self._proc = None
|
||||
self._loaded = False
|
||||
|
||||
def _ensure_proc(self) -> None:
|
||||
if self._proc is not None and self._proc.poll() is None:
|
||||
return
|
||||
log.debug('Starting STT worker subprocess')
|
||||
self._proc = subprocess.Popen(
|
||||
[sys.executable, str(Path(__file__).parent / 'stt_worker.py')],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
self._loaded = False
|
||||
|
||||
def _send(self, cmd: dict) -> dict:
|
||||
self._ensure_proc()
|
||||
pickle.dump(cmd, self._proc.stdin)
|
||||
self._proc.stdin.flush()
|
||||
try:
|
||||
response = pickle.load(self._proc.stdout)
|
||||
except EOFError as e:
|
||||
self._proc = None
|
||||
self._loaded = False
|
||||
raise RuntimeError('Worker subprocess exited unexpectedly') from e
|
||||
if not response.get('ok'):
|
||||
raise RuntimeError(response.get('error', 'unknown worker error'))
|
||||
return response
|
||||
|
||||
def _reschedule_unload(self) -> None:
|
||||
if self._unload_source is not None:
|
||||
GLib.source_remove(self._unload_source)
|
||||
self._unload_source = GLib.timeout_add_seconds(
|
||||
_IDLE_UNLOAD_SECONDS, self._on_idle_unload)
|
||||
|
||||
def _on_idle_unload(self) -> bool:
|
||||
self._unload_source = None
|
||||
log.debug('Idle unload after %ds', _IDLE_UNLOAD_SECONDS)
|
||||
self.unload_now()
|
||||
return GLib.SOURCE_REMOVE
|
||||
54
stt_voice_messages/models/stt_worker.py
Normal file
54
stt_voice_messages/models/stt_worker.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# This file is part of Gajim.
|
||||
#
|
||||
# Gajim is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Gajim is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import pickle
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
|
||||
def _respond(response: dict) -> None:
|
||||
pickle.dump(response, sys.stdout.buffer)
|
||||
sys.stdout.buffer.flush()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
model = None
|
||||
while True:
|
||||
try:
|
||||
cmd = pickle.load(sys.stdin.buffer)
|
||||
except EOFError:
|
||||
return
|
||||
try:
|
||||
op = cmd['op']
|
||||
if op == 'load':
|
||||
import onnx_asr
|
||||
model = onnx_asr.load_model(
|
||||
cmd['model_id'], cmd.get('model_path') or None)
|
||||
_respond({'ok': True})
|
||||
elif op == 'recognize':
|
||||
text = model.recognize(cmd['audio'])
|
||||
_respond({'ok': True, 'text': text})
|
||||
else:
|
||||
_respond({'ok': False, 'error': f'unknown op: {op}'})
|
||||
except Exception as e:
|
||||
_respond({
|
||||
'ok': False,
|
||||
'error': f'{type(e).__name__}: {e}',
|
||||
'traceback': traceback.format_exc(),
|
||||
})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -13,7 +13,7 @@
|
||||
"win32"
|
||||
],
|
||||
"requirements": [
|
||||
"gajim>=1.9.0"
|
||||
"gajim>=2.0.0"
|
||||
],
|
||||
"short_name": "stt_voice_messages",
|
||||
"version": "0.0.1"
|
||||
|
||||
@@ -15,17 +15,24 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
|
||||
from gi.repository import GLib, Gtk
|
||||
|
||||
from gajim.common import app
|
||||
from gajim.plugins import GajimPlugin
|
||||
from gajim.plugins.plugins_i18n import _
|
||||
|
||||
from .gtk.config_dialog import *
|
||||
from .gtk.config_dialog import Configuration, STTVoiceMessagesConfigDialog
|
||||
from .gtk.sttbox import STTBox
|
||||
from .models.model_settings import *
|
||||
from .models.model_settings import OnnxAsrSettings
|
||||
|
||||
log = logging.getLogger('gajim.p.stt_voice_messages')
|
||||
|
||||
_FOCUS_LOSS_UNLOAD_SECONDS = 30
|
||||
|
||||
|
||||
class STTVoiceMessagesPlugin(GajimPlugin):
|
||||
def init(self) -> None:
|
||||
@@ -33,42 +40,64 @@ class STTVoiceMessagesPlugin(GajimPlugin):
|
||||
|
||||
self.config_default_values = {
|
||||
'auto_transcribe': (False, ''),
|
||||
'model': ('model_openaiwhisper', ''),
|
||||
'model_openaiwhisper': (
|
||||
OpenAIWhisperSettings(
|
||||
model_size='tiny',
|
||||
translate_to_english=False),
|
||||
''),
|
||||
'model_faster-whisper': (
|
||||
FasterWhisperSettings(
|
||||
model_size='tiny',
|
||||
translate_to_english=False),
|
||||
'')
|
||||
'model_id': ('nemo-parakeet-tdt-0.6b-v3', ''),
|
||||
'model_path': ('', ''),
|
||||
}
|
||||
|
||||
self._config = Configuration(self)
|
||||
self._config.check_available_moduls()
|
||||
self.config_dialog = partial(STTVoiceMessagesConfigDialog, self._config)
|
||||
|
||||
self.gui_extension_points = {
|
||||
'preview_audio': (self._on_preview_audio_created, None),
|
||||
}
|
||||
|
||||
self._audio_file: str = ''
|
||||
self._preview_audio_widget = None
|
||||
self._stt_box = None
|
||||
self._active_handler_id = 0
|
||||
self._focus_unload_source = None
|
||||
|
||||
def activate(self) -> None:
|
||||
if app.window is not None and self._active_handler_id == 0:
|
||||
self._active_handler_id = app.window.connect(
|
||||
'notify::is-active', self._on_window_active_changed)
|
||||
|
||||
def deactivate(self) -> None:
|
||||
if self._focus_unload_source is not None:
|
||||
GLib.source_remove(self._focus_unload_source)
|
||||
self._focus_unload_source = None
|
||||
if self._active_handler_id != 0 and app.window is not None:
|
||||
app.window.disconnect(self._active_handler_id)
|
||||
self._active_handler_id = 0
|
||||
if self._config.is_available:
|
||||
self._config.unload_model()
|
||||
|
||||
def _on_window_active_changed(self,
|
||||
window: Gtk.Window,
|
||||
_pspec: object,
|
||||
) -> None:
|
||||
if window.is_active():
|
||||
if self._focus_unload_source is not None:
|
||||
GLib.source_remove(self._focus_unload_source)
|
||||
self._focus_unload_source = None
|
||||
elif self._focus_unload_source is None:
|
||||
self._focus_unload_source = GLib.timeout_add_seconds(
|
||||
_FOCUS_LOSS_UNLOAD_SECONDS, self._on_focus_unload_fired)
|
||||
|
||||
def _on_focus_unload_fired(self) -> bool:
|
||||
self._focus_unload_source = None
|
||||
if self._config.is_available:
|
||||
self._config.unload_model()
|
||||
return GLib.SOURCE_REMOVE
|
||||
|
||||
def _on_preview_audio_created(self,
|
||||
preview_audio_widget: Gtk.Box,
|
||||
drawing_box: Gtk.Box,
|
||||
control_box: Gtk.Box,
|
||||
audio_file: Path
|
||||
) -> None:
|
||||
self._preview_audio_widget = preview_audio_widget
|
||||
self._drawing_box = drawing_box;
|
||||
self._control_box = control_box;
|
||||
self._audio_file = audio_file.as_posix()
|
||||
self._create_stt_box()
|
||||
|
||||
def _create_stt_box(self) -> None:
|
||||
assert self._preview_audio_widget is not None
|
||||
self._stt_box = STTBox(self._preview_audio_widget,
|
||||
self.config,
|
||||
self._audio_file)
|
||||
self._preview_audio_widget.pack_end(self._stt_box, False, False, 0)
|
||||
self._stt_box = STTBox(self._config, self._audio_file)
|
||||
self._control_box.append(self._stt_box.button)
|
||||
self._drawing_box.append(self._stt_box)
|
||||
|
||||
Reference in New Issue
Block a user