Make Faster Whisper and OpenAI Whisper work
This commit is contained in:
@@ -15,13 +15,23 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict
|
||||
import logging
|
||||
import typing
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import whisper
|
||||
try:
|
||||
import whisper
|
||||
except ModuleNotFoundError:
|
||||
if typing.TYPE_CHECKING:
|
||||
import whisper
|
||||
|
||||
try:
|
||||
import faster_whisper as fwhisper
|
||||
except ModuleNotFoundError:
|
||||
if typing.TYPE_CHECKING:
|
||||
import faster_whisper as fwhisper
|
||||
|
||||
from gi.repository import Gtk
|
||||
|
||||
from gajim.common import app
|
||||
@@ -33,7 +43,7 @@ from gajim.gtk.sidebar_switcher import SideBarSwitcher
|
||||
from gajim.plugins.helpers import get_builder
|
||||
from gajim.plugins.plugins_i18n import _
|
||||
|
||||
from ..models import openai_whisper
|
||||
from ..models import faster_whisper, openai_whisper
|
||||
from ..models.model_settings import *
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -56,18 +66,10 @@ SUPPORTED_MODELS: dict[str, Model] = {
|
||||
['whisper'],
|
||||
openai_whisper.WhisperModel,
|
||||
OpenAIWhisperSettings),
|
||||
'model_ctranslate2': Model('CTranslate2',
|
||||
['ctranslate2'],
|
||||
None,
|
||||
None),
|
||||
'model_faster-whisper': Model('Fast-Whisper',
|
||||
['faster-whisper'],
|
||||
None,
|
||||
None),
|
||||
'model_distill': Model('Distill',
|
||||
['transformers', 'accelerate', 'datasets[audio]'],
|
||||
None,
|
||||
None)
|
||||
'model_faster-whisper': Model('Faster-Whisper',
|
||||
['faster_whisper'],
|
||||
faster_whisper.FasterWhisperModel,
|
||||
FasterWhisperSettings)
|
||||
}
|
||||
|
||||
|
||||
@@ -78,7 +80,7 @@ class Configuration:
|
||||
self._available_models: dict[str, Model] = {}
|
||||
self.check_available_moduls()
|
||||
|
||||
log.debug('config = %s', self._plugin.config['model_openaiwhisper'])
|
||||
log.debug('config = %s', self._plugin.config)
|
||||
|
||||
@property
|
||||
def plugin(self) -> STTVoiceMessagesPlugin:
|
||||
@@ -106,20 +108,20 @@ class Configuration:
|
||||
|
||||
self._plugin.config.data[model].instance.set_config(self.plugin.config.data[model])
|
||||
|
||||
def on_set_model(self, model: Any) -> None:
|
||||
if isinstance(model, str):
|
||||
model.strip()
|
||||
log.debug('plugin config before:\n %s', self.plugin.config.data)
|
||||
|
||||
def create_model(self, model: Any) -> None:
|
||||
if (self.plugin.config.data[model].instance is None and
|
||||
self._available_models[model].klass is not None):
|
||||
self.plugin.config.data[model].instance = \
|
||||
self._available_models[model].klass()
|
||||
else:
|
||||
return
|
||||
log.debug('Could not create model %s', model)
|
||||
|
||||
def on_set_model(self, model: Any, data: str = 'model') -> None:
|
||||
if isinstance(model, str):
|
||||
model.strip()
|
||||
|
||||
self.plugin.config['model'] = model
|
||||
log.debug('plugin config after:\n %s', self.plugin.config.data)
|
||||
log.debug('Created model %s with config %s', model, self.plugin.config.data[model])
|
||||
|
||||
def check_available_moduls(self):
|
||||
def is_module_available(module: str) -> bool:
|
||||
@@ -146,6 +148,7 @@ class Configuration:
|
||||
log.debug('plugin config for model = %s', self.plugin.config[model])
|
||||
self.plugin.config.data[model].instance = None
|
||||
self._available_models[model].config = self.plugin.config[model]
|
||||
self.create_model(model)
|
||||
|
||||
self.on_set_model(self._plugin.config['model'])
|
||||
|
||||
@@ -192,12 +195,39 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
|
||||
prefs: list[tuple[str, type[PreferenceBox]]] = [
|
||||
('stt_behaviour', self.STTBehaviour),
|
||||
('models', self.Models),
|
||||
('whisper_general', self.OpenAIWhisperGeneral),
|
||||
]
|
||||
self._add_prefs(prefs)
|
||||
|
||||
# TODO: Refactor this
|
||||
if 'model_openaiwhisper' in config.available_models:
|
||||
prefs.append(('openaiwhisper_general', self.OpenAIWhisperGeneral))
|
||||
else:
|
||||
self._disable_pref('openai-whisper-viewport') # does not work yet
|
||||
|
||||
if 'model_faster-whisper' in config.available_models:
|
||||
prefs.append(('fasterwhisper_general', self.FasterWhisperGeneral))
|
||||
else:
|
||||
self._disable_pref('faster-whisper') # does not work yet
|
||||
|
||||
self._add_prefs(prefs)
|
||||
self.show_all()
|
||||
|
||||
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
|
||||
for ui_name, klass in prefs:
|
||||
pref_box = getattr(self._ui, ui_name)
|
||||
pref = klass(self) # pyright: ignore
|
||||
log.debug('ui_name = %s, klass = %s, pref_box = %s', ui_name, klass, pref_box)
|
||||
pref_box.add(pref)
|
||||
self._prefs[ui_name] = pref
|
||||
|
||||
def _disable_pref(self, pref: str):
|
||||
# TODO: Not scrolling to setting does not work!
|
||||
pref_box = getattr(self._ui, pref)
|
||||
log.debug('Disable Settings Page for %s', pref_box)
|
||||
adj = Gtk.Adjustment(0, 0, 0)
|
||||
pref_box.set_focus_hadjustment(adj)
|
||||
pref_box.set_focus_vadjustment(adj)
|
||||
|
||||
|
||||
############################################################################
|
||||
# General Settings
|
||||
############################################################################
|
||||
@@ -266,9 +296,36 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
|
||||
def _set_config(self, value: Any, data: Any):
|
||||
self._config_dialog.config.on_config_model(self._model, value, data)
|
||||
|
||||
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
|
||||
for ui_name, klass in prefs:
|
||||
pref_box = getattr(self._ui, ui_name)
|
||||
pref = klass(self) # pyright: ignore
|
||||
pref_box.add(pref)
|
||||
self._prefs[ui_name] = pref
|
||||
############################################################################
|
||||
# Faster Whisper Settings
|
||||
############################################################################
|
||||
class FasterWhisperGeneral(PreferenceBox):
|
||||
def __init__(self,
|
||||
config_dialog: STTVoiceMessagesConfigDialog) -> None:
|
||||
self._model = 'model_faster-whisper'
|
||||
self._config_dialog = config_dialog
|
||||
|
||||
settings = [
|
||||
Setting(SettingKind.POPOVER,
|
||||
_('Language Model Size'),
|
||||
SettingType.VALUE,
|
||||
value=config_dialog.config.available_models[
|
||||
self._model].config.model_size,
|
||||
data='model_size',
|
||||
callback=self._set_config,
|
||||
props={'entries': fwhisper.available_models()}),
|
||||
|
||||
Setting(SettingKind.SWITCH,
|
||||
_('Translate'),
|
||||
SettingType.VALUE,
|
||||
value=config_dialog.config.available_models[
|
||||
self._model].config.translate_to_english,
|
||||
data='translate_to_english',
|
||||
callback=self._set_config)
|
||||
]
|
||||
|
||||
PreferenceBox.__init__(self, settings)
|
||||
|
||||
def _set_config(self, value: Any, data: Any):
|
||||
self._config_dialog.config.on_config_model(self._model, value,
|
||||
data)
|
||||
|
||||
@@ -127,14 +127,15 @@
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<object class="GtkScrolledWindow">
|
||||
<object class="GtkScrolledWindow" id="openai-whisper">
|
||||
<property name="name">openai-whisper</property>
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">True</property>
|
||||
<property name="hscrollbar-policy">never</property>
|
||||
<property name="shadow-type">in</property>
|
||||
<property name="overlay-scrolling">False</property>
|
||||
<child>
|
||||
<object class="GtkViewport">
|
||||
<object class="GtkViewport" id="openai-whisper-viewport">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<child>
|
||||
@@ -145,7 +146,7 @@
|
||||
<property name="spacing">24</property>
|
||||
<child>
|
||||
<!-- n-columns=3 n-rows=3 -->
|
||||
<object class="GtkGrid" id="whisper_general">
|
||||
<object class="GtkGrid" id="openaiwhisper_general">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
@@ -220,19 +221,97 @@
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
<object class="GtkScrolledWindow" id="faster-whisper">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">True</property>
|
||||
<property name="hscrollbar-policy">never</property>
|
||||
<property name="shadow-type">in</property>
|
||||
<property name="overlay-scrolling">False</property>
|
||||
<child>
|
||||
<object class="GtkViewport">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<child>
|
||||
<object class="GtkBox">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="spacing">24</property>
|
||||
<child>
|
||||
<!-- n-columns=3 n-rows=3 -->
|
||||
<object class="GtkGrid" id="fasterwhisper_general">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="orientation">vertical</property>
|
||||
<property name="row-spacing">12</property>
|
||||
<child>
|
||||
<object class="GtkLabel">
|
||||
<property name="visible">True</property>
|
||||
<property name="can-focus">False</property>
|
||||
<property name="label" translatable="yes">General</property>
|
||||
<property name="xalign">0</property>
|
||||
<style>
|
||||
<class name="bold"/>
|
||||
</style>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="left-attach">0</property>
|
||||
<property name="top-attach">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="expand">False</property>
|
||||
<property name="fill">True</property>
|
||||
<property name="position">0</property>
|
||||
</packing>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
<child>
|
||||
<placeholder/>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
<packing>
|
||||
<property name="name">faster-whisper</property>
|
||||
<property name="title" translatable="yes">Faster Whisper</property>
|
||||
<property name="position">2</property>
|
||||
</packing>
|
||||
</child>
|
||||
<style>
|
||||
<class name="settings-stack"/>
|
||||
|
||||
60
stt_voice_messages/models/faster_whisper.py
Normal file
60
stt_voice_messages/models/faster_whisper.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# This file is part of Gajim.
|
||||
#
|
||||
# Gajim is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Gajim is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import logging
|
||||
import typing
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from ..helper import Results
|
||||
from .model_settings import FasterWhisperSettings
|
||||
from .model_template import Model
|
||||
|
||||
log = logging.getLogger('gajim.p.sttvm_faster_whisper')
|
||||
|
||||
try:
|
||||
import faster_whisper
|
||||
except ModuleNotFoundError:
|
||||
if typing.TYPE_CHECKING:
|
||||
import faster_whisper
|
||||
|
||||
@dataclass
|
||||
class Configuration:
|
||||
model_size: str
|
||||
|
||||
class FasterWhisperModel(Model):
|
||||
def __init__(self):
|
||||
self._result: str = ''
|
||||
self._config = FasterWhisperSettings()
|
||||
|
||||
@property
|
||||
def result(self) -> str:
|
||||
return self._result
|
||||
|
||||
def transcribe(self, result: Results, audio_file: Path) -> None:
|
||||
model = faster_whisper.WhisperModel(self._config.model_size, compute_type="float32")
|
||||
log.debug('model size is used = %s', self._config.model_size)
|
||||
segments, _ = model.transcribe(audio_file)
|
||||
segments = list(segments)
|
||||
result.text = ''
|
||||
log.debug('segments = %s', segments)
|
||||
for segment in segments:
|
||||
result.text += segment.text
|
||||
print("[%.2fs -> %.2fs] %s" % (
|
||||
segment.start, segment.end, segment.text))
|
||||
|
||||
def set_config(self, config: FasterWhisperSettings) -> None:
|
||||
self._config = config
|
||||
|
||||
@@ -19,5 +19,10 @@ from dataclasses import dataclass, field
|
||||
|
||||
@dataclass
|
||||
class OpenAIWhisperSettings:
|
||||
model_size: str = field(default='tiny', init=True)
|
||||
translate_to_english: bool = field(default=False, init=True)
|
||||
|
||||
@dataclass
|
||||
class FasterWhisperSettings:
|
||||
model_size: str = field(default='tiny', init=True)
|
||||
translate_to_english: bool = field(default=False, init=True)
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from ..helper import Results
|
||||
|
||||
@@ -22,5 +23,9 @@ from ..helper import Results
|
||||
class Model(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def transcribe(self, result: Results, audio_file: Path) -> str:
|
||||
return ''
|
||||
def transcribe(self, result: Results, audio_file: Path) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_config(self, config: Any) -> None:
|
||||
pass
|
||||
|
||||
@@ -37,7 +37,6 @@ class Configuration:
|
||||
|
||||
class WhisperModel(Model):
|
||||
def __init__(self):
|
||||
# TODO
|
||||
self._result: str = ''
|
||||
self._config = OpenAIWhisperSettings()
|
||||
|
||||
@@ -45,7 +44,7 @@ class WhisperModel(Model):
|
||||
def result(self) -> str:
|
||||
return self._result
|
||||
|
||||
def transcribe(self, result: Results, audio_file: Path) -> str:
|
||||
def transcribe(self, result: Results, audio_file: Path) -> None:
|
||||
model = whisper.load_model(self._config.model_size)
|
||||
log.debug('model size is used = %s', self._config.model_size)
|
||||
result.text = model.transcribe(audio_file)['text'] # pyright: ignore [reportAttributeAccessIssue]
|
||||
|
||||
@@ -38,6 +38,11 @@ class STTVoiceMessagesPlugin(GajimPlugin):
|
||||
OpenAIWhisperSettings(
|
||||
model_size='tiny',
|
||||
translate_to_english=False),
|
||||
''),
|
||||
'model_faster-whisper': (
|
||||
FasterWhisperSettings(
|
||||
model_size='tiny',
|
||||
translate_to_english=False),
|
||||
'')
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user