Make Faster Whisper and OpenAI Whisper work
This commit is contained in:
@@ -15,13 +15,23 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import asdict
|
|
||||||
import logging
|
import logging
|
||||||
import typing
|
import typing
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import whisper
|
try:
|
||||||
|
import whisper
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
import whisper
|
||||||
|
|
||||||
|
try:
|
||||||
|
import faster_whisper as fwhisper
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
import faster_whisper as fwhisper
|
||||||
|
|
||||||
from gi.repository import Gtk
|
from gi.repository import Gtk
|
||||||
|
|
||||||
from gajim.common import app
|
from gajim.common import app
|
||||||
@@ -33,7 +43,7 @@ from gajim.gtk.sidebar_switcher import SideBarSwitcher
|
|||||||
from gajim.plugins.helpers import get_builder
|
from gajim.plugins.helpers import get_builder
|
||||||
from gajim.plugins.plugins_i18n import _
|
from gajim.plugins.plugins_i18n import _
|
||||||
|
|
||||||
from ..models import openai_whisper
|
from ..models import faster_whisper, openai_whisper
|
||||||
from ..models.model_settings import *
|
from ..models.model_settings import *
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -56,18 +66,10 @@ SUPPORTED_MODELS: dict[str, Model] = {
|
|||||||
['whisper'],
|
['whisper'],
|
||||||
openai_whisper.WhisperModel,
|
openai_whisper.WhisperModel,
|
||||||
OpenAIWhisperSettings),
|
OpenAIWhisperSettings),
|
||||||
'model_ctranslate2': Model('CTranslate2',
|
'model_faster-whisper': Model('Faster-Whisper',
|
||||||
['ctranslate2'],
|
['faster_whisper'],
|
||||||
None,
|
faster_whisper.FasterWhisperModel,
|
||||||
None),
|
FasterWhisperSettings)
|
||||||
'model_faster-whisper': Model('Fast-Whisper',
|
|
||||||
['faster-whisper'],
|
|
||||||
None,
|
|
||||||
None),
|
|
||||||
'model_distill': Model('Distill',
|
|
||||||
['transformers', 'accelerate', 'datasets[audio]'],
|
|
||||||
None,
|
|
||||||
None)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -78,7 +80,7 @@ class Configuration:
|
|||||||
self._available_models: dict[str, Model] = {}
|
self._available_models: dict[str, Model] = {}
|
||||||
self.check_available_moduls()
|
self.check_available_moduls()
|
||||||
|
|
||||||
log.debug('config = %s', self._plugin.config['model_openaiwhisper'])
|
log.debug('config = %s', self._plugin.config)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def plugin(self) -> STTVoiceMessagesPlugin:
|
def plugin(self) -> STTVoiceMessagesPlugin:
|
||||||
@@ -106,20 +108,20 @@ class Configuration:
|
|||||||
|
|
||||||
self._plugin.config.data[model].instance.set_config(self.plugin.config.data[model])
|
self._plugin.config.data[model].instance.set_config(self.plugin.config.data[model])
|
||||||
|
|
||||||
def on_set_model(self, model: Any) -> None:
|
def create_model(self, model: Any) -> None:
|
||||||
if isinstance(model, str):
|
|
||||||
model.strip()
|
|
||||||
log.debug('plugin config before:\n %s', self.plugin.config.data)
|
|
||||||
|
|
||||||
if (self.plugin.config.data[model].instance is None and
|
if (self.plugin.config.data[model].instance is None and
|
||||||
self._available_models[model].klass is not None):
|
self._available_models[model].klass is not None):
|
||||||
self.plugin.config.data[model].instance = \
|
self.plugin.config.data[model].instance = \
|
||||||
self._available_models[model].klass()
|
self._available_models[model].klass()
|
||||||
else:
|
else:
|
||||||
return
|
log.debug('Could not create model %s', model)
|
||||||
|
|
||||||
|
def on_set_model(self, model: Any, data: str = 'model') -> None:
|
||||||
|
if isinstance(model, str):
|
||||||
|
model.strip()
|
||||||
|
|
||||||
self.plugin.config['model'] = model
|
self.plugin.config['model'] = model
|
||||||
log.debug('plugin config after:\n %s', self.plugin.config.data)
|
log.debug('Created model %s with config %s', model, self.plugin.config.data[model])
|
||||||
|
|
||||||
def check_available_moduls(self):
|
def check_available_moduls(self):
|
||||||
def is_module_available(module: str) -> bool:
|
def is_module_available(module: str) -> bool:
|
||||||
@@ -146,6 +148,7 @@ class Configuration:
|
|||||||
log.debug('plugin config for model = %s', self.plugin.config[model])
|
log.debug('plugin config for model = %s', self.plugin.config[model])
|
||||||
self.plugin.config.data[model].instance = None
|
self.plugin.config.data[model].instance = None
|
||||||
self._available_models[model].config = self.plugin.config[model]
|
self._available_models[model].config = self.plugin.config[model]
|
||||||
|
self.create_model(model)
|
||||||
|
|
||||||
self.on_set_model(self._plugin.config['model'])
|
self.on_set_model(self._plugin.config['model'])
|
||||||
|
|
||||||
@@ -192,12 +195,39 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
|
|||||||
prefs: list[tuple[str, type[PreferenceBox]]] = [
|
prefs: list[tuple[str, type[PreferenceBox]]] = [
|
||||||
('stt_behaviour', self.STTBehaviour),
|
('stt_behaviour', self.STTBehaviour),
|
||||||
('models', self.Models),
|
('models', self.Models),
|
||||||
('whisper_general', self.OpenAIWhisperGeneral),
|
|
||||||
]
|
]
|
||||||
self._add_prefs(prefs)
|
|
||||||
|
|
||||||
|
# TODO: Refactor this
|
||||||
|
if 'model_openaiwhisper' in config.available_models:
|
||||||
|
prefs.append(('openaiwhisper_general', self.OpenAIWhisperGeneral))
|
||||||
|
else:
|
||||||
|
self._disable_pref('openai-whisper-viewport') # does not work yet
|
||||||
|
|
||||||
|
if 'model_faster-whisper' in config.available_models:
|
||||||
|
prefs.append(('fasterwhisper_general', self.FasterWhisperGeneral))
|
||||||
|
else:
|
||||||
|
self._disable_pref('faster-whisper') # does not work yet
|
||||||
|
|
||||||
|
self._add_prefs(prefs)
|
||||||
self.show_all()
|
self.show_all()
|
||||||
|
|
||||||
|
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
|
||||||
|
for ui_name, klass in prefs:
|
||||||
|
pref_box = getattr(self._ui, ui_name)
|
||||||
|
pref = klass(self) # pyright: ignore
|
||||||
|
log.debug('ui_name = %s, klass = %s, pref_box = %s', ui_name, klass, pref_box)
|
||||||
|
pref_box.add(pref)
|
||||||
|
self._prefs[ui_name] = pref
|
||||||
|
|
||||||
|
def _disable_pref(self, pref: str):
|
||||||
|
# TODO: Not scrolling to setting does not work!
|
||||||
|
pref_box = getattr(self._ui, pref)
|
||||||
|
log.debug('Disable Settings Page for %s', pref_box)
|
||||||
|
adj = Gtk.Adjustment(0, 0, 0)
|
||||||
|
pref_box.set_focus_hadjustment(adj)
|
||||||
|
pref_box.set_focus_vadjustment(adj)
|
||||||
|
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
# General Settings
|
# General Settings
|
||||||
############################################################################
|
############################################################################
|
||||||
@@ -266,9 +296,36 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
|
|||||||
def _set_config(self, value: Any, data: Any):
|
def _set_config(self, value: Any, data: Any):
|
||||||
self._config_dialog.config.on_config_model(self._model, value, data)
|
self._config_dialog.config.on_config_model(self._model, value, data)
|
||||||
|
|
||||||
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
|
############################################################################
|
||||||
for ui_name, klass in prefs:
|
# Faster Whisper Settings
|
||||||
pref_box = getattr(self._ui, ui_name)
|
############################################################################
|
||||||
pref = klass(self) # pyright: ignore
|
class FasterWhisperGeneral(PreferenceBox):
|
||||||
pref_box.add(pref)
|
def __init__(self,
|
||||||
self._prefs[ui_name] = pref
|
config_dialog: STTVoiceMessagesConfigDialog) -> None:
|
||||||
|
self._model = 'model_faster-whisper'
|
||||||
|
self._config_dialog = config_dialog
|
||||||
|
|
||||||
|
settings = [
|
||||||
|
Setting(SettingKind.POPOVER,
|
||||||
|
_('Language Model Size'),
|
||||||
|
SettingType.VALUE,
|
||||||
|
value=config_dialog.config.available_models[
|
||||||
|
self._model].config.model_size,
|
||||||
|
data='model_size',
|
||||||
|
callback=self._set_config,
|
||||||
|
props={'entries': fwhisper.available_models()}),
|
||||||
|
|
||||||
|
Setting(SettingKind.SWITCH,
|
||||||
|
_('Translate'),
|
||||||
|
SettingType.VALUE,
|
||||||
|
value=config_dialog.config.available_models[
|
||||||
|
self._model].config.translate_to_english,
|
||||||
|
data='translate_to_english',
|
||||||
|
callback=self._set_config)
|
||||||
|
]
|
||||||
|
|
||||||
|
PreferenceBox.__init__(self, settings)
|
||||||
|
|
||||||
|
def _set_config(self, value: Any, data: Any):
|
||||||
|
self._config_dialog.config.on_config_model(self._model, value,
|
||||||
|
data)
|
||||||
|
|||||||
@@ -127,14 +127,15 @@
|
|||||||
</packing>
|
</packing>
|
||||||
</child>
|
</child>
|
||||||
<child>
|
<child>
|
||||||
<object class="GtkScrolledWindow">
|
<object class="GtkScrolledWindow" id="openai-whisper">
|
||||||
|
<property name="name">openai-whisper</property>
|
||||||
<property name="visible">True</property>
|
<property name="visible">True</property>
|
||||||
<property name="can-focus">True</property>
|
<property name="can-focus">True</property>
|
||||||
<property name="hscrollbar-policy">never</property>
|
<property name="hscrollbar-policy">never</property>
|
||||||
<property name="shadow-type">in</property>
|
<property name="shadow-type">in</property>
|
||||||
<property name="overlay-scrolling">False</property>
|
<property name="overlay-scrolling">False</property>
|
||||||
<child>
|
<child>
|
||||||
<object class="GtkViewport">
|
<object class="GtkViewport" id="openai-whisper-viewport">
|
||||||
<property name="visible">True</property>
|
<property name="visible">True</property>
|
||||||
<property name="can-focus">False</property>
|
<property name="can-focus">False</property>
|
||||||
<child>
|
<child>
|
||||||
@@ -145,7 +146,7 @@
|
|||||||
<property name="spacing">24</property>
|
<property name="spacing">24</property>
|
||||||
<child>
|
<child>
|
||||||
<!-- n-columns=3 n-rows=3 -->
|
<!-- n-columns=3 n-rows=3 -->
|
||||||
<object class="GtkGrid" id="whisper_general">
|
<object class="GtkGrid" id="openaiwhisper_general">
|
||||||
<property name="visible">True</property>
|
<property name="visible">True</property>
|
||||||
<property name="can-focus">False</property>
|
<property name="can-focus">False</property>
|
||||||
<property name="orientation">vertical</property>
|
<property name="orientation">vertical</property>
|
||||||
@@ -220,19 +221,97 @@
|
|||||||
</packing>
|
</packing>
|
||||||
</child>
|
</child>
|
||||||
<child>
|
<child>
|
||||||
<placeholder/>
|
<object class="GtkScrolledWindow" id="faster-whisper">
|
||||||
</child>
|
<property name="visible">True</property>
|
||||||
<child>
|
<property name="can-focus">True</property>
|
||||||
<placeholder/>
|
<property name="hscrollbar-policy">never</property>
|
||||||
</child>
|
<property name="shadow-type">in</property>
|
||||||
<child>
|
<property name="overlay-scrolling">False</property>
|
||||||
<placeholder/>
|
<child>
|
||||||
</child>
|
<object class="GtkViewport">
|
||||||
<child>
|
<property name="visible">True</property>
|
||||||
<placeholder/>
|
<property name="can-focus">False</property>
|
||||||
</child>
|
<child>
|
||||||
<child>
|
<object class="GtkBox">
|
||||||
<placeholder/>
|
<property name="visible">True</property>
|
||||||
|
<property name="can-focus">False</property>
|
||||||
|
<property name="orientation">vertical</property>
|
||||||
|
<property name="spacing">24</property>
|
||||||
|
<child>
|
||||||
|
<!-- n-columns=3 n-rows=3 -->
|
||||||
|
<object class="GtkGrid" id="fasterwhisper_general">
|
||||||
|
<property name="visible">True</property>
|
||||||
|
<property name="can-focus">False</property>
|
||||||
|
<property name="orientation">vertical</property>
|
||||||
|
<property name="row-spacing">12</property>
|
||||||
|
<child>
|
||||||
|
<object class="GtkLabel">
|
||||||
|
<property name="visible">True</property>
|
||||||
|
<property name="can-focus">False</property>
|
||||||
|
<property name="label" translatable="yes">General</property>
|
||||||
|
<property name="xalign">0</property>
|
||||||
|
<style>
|
||||||
|
<class name="bold"/>
|
||||||
|
</style>
|
||||||
|
</object>
|
||||||
|
<packing>
|
||||||
|
<property name="left-attach">0</property>
|
||||||
|
<property name="top-attach">0</property>
|
||||||
|
</packing>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
</object>
|
||||||
|
<packing>
|
||||||
|
<property name="expand">False</property>
|
||||||
|
<property name="fill">True</property>
|
||||||
|
<property name="position">0</property>
|
||||||
|
</packing>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
<child>
|
||||||
|
<placeholder/>
|
||||||
|
</child>
|
||||||
|
</object>
|
||||||
|
</child>
|
||||||
|
</object>
|
||||||
|
</child>
|
||||||
|
</object>
|
||||||
|
<packing>
|
||||||
|
<property name="name">faster-whisper</property>
|
||||||
|
<property name="title" translatable="yes">Faster Whisper</property>
|
||||||
|
<property name="position">2</property>
|
||||||
|
</packing>
|
||||||
</child>
|
</child>
|
||||||
<style>
|
<style>
|
||||||
<class name="settings-stack"/>
|
<class name="settings-stack"/>
|
||||||
|
|||||||
60
stt_voice_messages/models/faster_whisper.py
Normal file
60
stt_voice_messages/models/faster_whisper.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# This file is part of Gajim.
|
||||||
|
#
|
||||||
|
# Gajim is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Gajim is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import typing
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..helper import Results
|
||||||
|
from .model_settings import FasterWhisperSettings
|
||||||
|
from .model_template import Model
|
||||||
|
|
||||||
|
log = logging.getLogger('gajim.p.sttvm_faster_whisper')
|
||||||
|
|
||||||
|
try:
|
||||||
|
import faster_whisper
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
import faster_whisper
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Configuration:
|
||||||
|
model_size: str
|
||||||
|
|
||||||
|
class FasterWhisperModel(Model):
|
||||||
|
def __init__(self):
|
||||||
|
self._result: str = ''
|
||||||
|
self._config = FasterWhisperSettings()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def result(self) -> str:
|
||||||
|
return self._result
|
||||||
|
|
||||||
|
def transcribe(self, result: Results, audio_file: Path) -> None:
|
||||||
|
model = faster_whisper.WhisperModel(self._config.model_size, compute_type="float32")
|
||||||
|
log.debug('model size is used = %s', self._config.model_size)
|
||||||
|
segments, _ = model.transcribe(audio_file)
|
||||||
|
segments = list(segments)
|
||||||
|
result.text = ''
|
||||||
|
log.debug('segments = %s', segments)
|
||||||
|
for segment in segments:
|
||||||
|
result.text += segment.text
|
||||||
|
print("[%.2fs -> %.2fs] %s" % (
|
||||||
|
segment.start, segment.end, segment.text))
|
||||||
|
|
||||||
|
def set_config(self, config: FasterWhisperSettings) -> None:
|
||||||
|
self._config = config
|
||||||
|
|
||||||
@@ -19,5 +19,10 @@ from dataclasses import dataclass, field
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class OpenAIWhisperSettings:
|
class OpenAIWhisperSettings:
|
||||||
|
model_size: str = field(default='tiny', init=True)
|
||||||
|
translate_to_english: bool = field(default=False, init=True)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FasterWhisperSettings:
|
||||||
model_size: str = field(default='tiny', init=True)
|
model_size: str = field(default='tiny', init=True)
|
||||||
translate_to_english: bool = field(default=False, init=True)
|
translate_to_english: bool = field(default=False, init=True)
|
||||||
@@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from ..helper import Results
|
from ..helper import Results
|
||||||
|
|
||||||
@@ -22,5 +23,9 @@ from ..helper import Results
|
|||||||
class Model(ABC):
|
class Model(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def transcribe(self, result: Results, audio_file: Path) -> str:
|
def transcribe(self, result: Results, audio_file: Path) -> None:
|
||||||
return ''
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def set_config(self, config: Any) -> None:
|
||||||
|
pass
|
||||||
|
|||||||
@@ -37,7 +37,6 @@ class Configuration:
|
|||||||
|
|
||||||
class WhisperModel(Model):
|
class WhisperModel(Model):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# TODO
|
|
||||||
self._result: str = ''
|
self._result: str = ''
|
||||||
self._config = OpenAIWhisperSettings()
|
self._config = OpenAIWhisperSettings()
|
||||||
|
|
||||||
@@ -45,7 +44,7 @@ class WhisperModel(Model):
|
|||||||
def result(self) -> str:
|
def result(self) -> str:
|
||||||
return self._result
|
return self._result
|
||||||
|
|
||||||
def transcribe(self, result: Results, audio_file: Path) -> str:
|
def transcribe(self, result: Results, audio_file: Path) -> None:
|
||||||
model = whisper.load_model(self._config.model_size)
|
model = whisper.load_model(self._config.model_size)
|
||||||
log.debug('model size is used = %s', self._config.model_size)
|
log.debug('model size is used = %s', self._config.model_size)
|
||||||
result.text = model.transcribe(audio_file)['text'] # pyright: ignore [reportAttributeAccessIssue]
|
result.text = model.transcribe(audio_file)['text'] # pyright: ignore [reportAttributeAccessIssue]
|
||||||
|
|||||||
@@ -38,6 +38,11 @@ class STTVoiceMessagesPlugin(GajimPlugin):
|
|||||||
OpenAIWhisperSettings(
|
OpenAIWhisperSettings(
|
||||||
model_size='tiny',
|
model_size='tiny',
|
||||||
translate_to_english=False),
|
translate_to_english=False),
|
||||||
|
''),
|
||||||
|
'model_faster-whisper': (
|
||||||
|
FasterWhisperSettings(
|
||||||
|
model_size='tiny',
|
||||||
|
translate_to_english=False),
|
||||||
'')
|
'')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user