Make Faster Whisper and OpenAI Whisper work

This commit is contained in:
mesonium
2024-07-03 18:43:27 +02:00
committed by hueso
parent b0adecef7a
commit 2d7630a757
7 changed files with 261 additions and 51 deletions

View File

@@ -15,13 +15,23 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import asdict
import logging import logging
import typing import typing
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import whisper try:
import whisper
except ModuleNotFoundError:
if typing.TYPE_CHECKING:
import whisper
try:
import faster_whisper as fwhisper
except ModuleNotFoundError:
if typing.TYPE_CHECKING:
import faster_whisper as fwhisper
from gi.repository import Gtk from gi.repository import Gtk
from gajim.common import app from gajim.common import app
@@ -33,7 +43,7 @@ from gajim.gtk.sidebar_switcher import SideBarSwitcher
from gajim.plugins.helpers import get_builder from gajim.plugins.helpers import get_builder
from gajim.plugins.plugins_i18n import _ from gajim.plugins.plugins_i18n import _
from ..models import openai_whisper from ..models import faster_whisper, openai_whisper
from ..models.model_settings import * from ..models.model_settings import *
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -56,18 +66,10 @@ SUPPORTED_MODELS: dict[str, Model] = {
['whisper'], ['whisper'],
openai_whisper.WhisperModel, openai_whisper.WhisperModel,
OpenAIWhisperSettings), OpenAIWhisperSettings),
'model_ctranslate2': Model('CTranslate2', 'model_faster-whisper': Model('Faster-Whisper',
['ctranslate2'], ['faster_whisper'],
None, faster_whisper.FasterWhisperModel,
None), FasterWhisperSettings)
'model_faster-whisper': Model('Fast-Whisper',
['faster-whisper'],
None,
None),
'model_distill': Model('Distill',
['transformers', 'accelerate', 'datasets[audio]'],
None,
None)
} }
@@ -78,7 +80,7 @@ class Configuration:
self._available_models: dict[str, Model] = {} self._available_models: dict[str, Model] = {}
self.check_available_moduls() self.check_available_moduls()
log.debug('config = %s', self._plugin.config['model_openaiwhisper']) log.debug('config = %s', self._plugin.config)
@property @property
def plugin(self) -> STTVoiceMessagesPlugin: def plugin(self) -> STTVoiceMessagesPlugin:
@@ -106,20 +108,20 @@ class Configuration:
self._plugin.config.data[model].instance.set_config(self.plugin.config.data[model]) self._plugin.config.data[model].instance.set_config(self.plugin.config.data[model])
def on_set_model(self, model: Any) -> None: def create_model(self, model: Any) -> None:
if isinstance(model, str):
model.strip()
log.debug('plugin config before:\n %s', self.plugin.config.data)
if (self.plugin.config.data[model].instance is None and if (self.plugin.config.data[model].instance is None and
self._available_models[model].klass is not None): self._available_models[model].klass is not None):
self.plugin.config.data[model].instance = \ self.plugin.config.data[model].instance = \
self._available_models[model].klass() self._available_models[model].klass()
else: else:
return log.debug('Could not create model %s', model)
def on_set_model(self, model: Any, data: str = 'model') -> None:
if isinstance(model, str):
model.strip()
self.plugin.config['model'] = model self.plugin.config['model'] = model
log.debug('plugin config after:\n %s', self.plugin.config.data) log.debug('Created model %s with config %s', model, self.plugin.config.data[model])
def check_available_moduls(self): def check_available_moduls(self):
def is_module_available(module: str) -> bool: def is_module_available(module: str) -> bool:
@@ -146,6 +148,7 @@ class Configuration:
log.debug('plugin config for model = %s', self.plugin.config[model]) log.debug('plugin config for model = %s', self.plugin.config[model])
self.plugin.config.data[model].instance = None self.plugin.config.data[model].instance = None
self._available_models[model].config = self.plugin.config[model] self._available_models[model].config = self.plugin.config[model]
self.create_model(model)
self.on_set_model(self._plugin.config['model']) self.on_set_model(self._plugin.config['model'])
@@ -192,12 +195,39 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
prefs: list[tuple[str, type[PreferenceBox]]] = [ prefs: list[tuple[str, type[PreferenceBox]]] = [
('stt_behaviour', self.STTBehaviour), ('stt_behaviour', self.STTBehaviour),
('models', self.Models), ('models', self.Models),
('whisper_general', self.OpenAIWhisperGeneral),
] ]
self._add_prefs(prefs)
# TODO: Refactor this
if 'model_openaiwhisper' in config.available_models:
prefs.append(('openaiwhisper_general', self.OpenAIWhisperGeneral))
else:
self._disable_pref('openai-whisper-viewport') # does not work yet
if 'model_faster-whisper' in config.available_models:
prefs.append(('fasterwhisper_general', self.FasterWhisperGeneral))
else:
self._disable_pref('faster-whisper') # does not work yet
self._add_prefs(prefs)
self.show_all() self.show_all()
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]):
for ui_name, klass in prefs:
pref_box = getattr(self._ui, ui_name)
pref = klass(self) # pyright: ignore
log.debug('ui_name = %s, klass = %s, pref_box = %s', ui_name, klass, pref_box)
pref_box.add(pref)
self._prefs[ui_name] = pref
def _disable_pref(self, pref: str):
# TODO: Not scrolling to setting does not work!
pref_box = getattr(self._ui, pref)
log.debug('Disable Settings Page for %s', pref_box)
adj = Gtk.Adjustment(0, 0, 0)
pref_box.set_focus_hadjustment(adj)
pref_box.set_focus_vadjustment(adj)
############################################################################ ############################################################################
# General Settings # General Settings
############################################################################ ############################################################################
@@ -266,9 +296,36 @@ class STTVoiceMessagesConfigDialog(Gtk.ApplicationWindow):
def _set_config(self, value: Any, data: Any): def _set_config(self, value: Any, data: Any):
self._config_dialog.config.on_config_model(self._model, value, data) self._config_dialog.config.on_config_model(self._model, value, data)
def _add_prefs(self, prefs: list[tuple[str, type[PreferenceBox]]]): ############################################################################
for ui_name, klass in prefs: # Faster Whisper Settings
pref_box = getattr(self._ui, ui_name) ############################################################################
pref = klass(self) # pyright: ignore class FasterWhisperGeneral(PreferenceBox):
pref_box.add(pref) def __init__(self,
self._prefs[ui_name] = pref config_dialog: STTVoiceMessagesConfigDialog) -> None:
self._model = 'model_faster-whisper'
self._config_dialog = config_dialog
settings = [
Setting(SettingKind.POPOVER,
_('Language Model Size'),
SettingType.VALUE,
value=config_dialog.config.available_models[
self._model].config.model_size,
data='model_size',
callback=self._set_config,
props={'entries': fwhisper.available_models()}),
Setting(SettingKind.SWITCH,
_('Translate'),
SettingType.VALUE,
value=config_dialog.config.available_models[
self._model].config.translate_to_english,
data='translate_to_english',
callback=self._set_config)
]
PreferenceBox.__init__(self, settings)
def _set_config(self, value: Any, data: Any):
self._config_dialog.config.on_config_model(self._model, value,
data)

View File

@@ -127,14 +127,15 @@
</packing> </packing>
</child> </child>
<child> <child>
<object class="GtkScrolledWindow"> <object class="GtkScrolledWindow" id="openai-whisper">
<property name="name">openai-whisper</property>
<property name="visible">True</property> <property name="visible">True</property>
<property name="can-focus">True</property> <property name="can-focus">True</property>
<property name="hscrollbar-policy">never</property> <property name="hscrollbar-policy">never</property>
<property name="shadow-type">in</property> <property name="shadow-type">in</property>
<property name="overlay-scrolling">False</property> <property name="overlay-scrolling">False</property>
<child> <child>
<object class="GtkViewport"> <object class="GtkViewport" id="openai-whisper-viewport">
<property name="visible">True</property> <property name="visible">True</property>
<property name="can-focus">False</property> <property name="can-focus">False</property>
<child> <child>
@@ -145,7 +146,7 @@
<property name="spacing">24</property> <property name="spacing">24</property>
<child> <child>
<!-- n-columns=3 n-rows=3 --> <!-- n-columns=3 n-rows=3 -->
<object class="GtkGrid" id="whisper_general"> <object class="GtkGrid" id="openaiwhisper_general">
<property name="visible">True</property> <property name="visible">True</property>
<property name="can-focus">False</property> <property name="can-focus">False</property>
<property name="orientation">vertical</property> <property name="orientation">vertical</property>
@@ -219,6 +220,54 @@
<property name="position">1</property> <property name="position">1</property>
</packing> </packing>
</child> </child>
<child>
<object class="GtkScrolledWindow" id="faster-whisper">
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="hscrollbar-policy">never</property>
<property name="shadow-type">in</property>
<property name="overlay-scrolling">False</property>
<child>
<object class="GtkViewport">
<property name="visible">True</property>
<property name="can-focus">False</property>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<property name="spacing">24</property>
<child>
<!-- n-columns=3 n-rows=3 -->
<object class="GtkGrid" id="fasterwhisper_general">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="orientation">vertical</property>
<property name="row-spacing">12</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">General</property>
<property name="xalign">0</property>
<style>
<class name="bold"/>
</style>
</object>
<packing>
<property name="left-attach">0</property>
<property name="top-attach">0</property>
</packing>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child> <child>
<placeholder/> <placeholder/>
</child> </child>
@@ -234,6 +283,36 @@
<child> <child>
<placeholder/> <placeholder/>
</child> </child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
<child>
<placeholder/>
</child>
</object>
</child>
</object>
</child>
</object>
<packing>
<property name="name">faster-whisper</property>
<property name="title" translatable="yes">Faster Whisper</property>
<property name="position">2</property>
</packing>
</child>
<style> <style>
<class name="settings-stack"/> <class name="settings-stack"/>
</style> </style>

View File

@@ -0,0 +1,60 @@
# This file is part of Gajim.
#
# Gajim is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Gajim is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Gajim. If not, see <http://www.gnu.org/licenses/>.
import logging
import typing
from dataclasses import dataclass
from pathlib import Path
from ..helper import Results
from .model_settings import FasterWhisperSettings
from .model_template import Model
log = logging.getLogger('gajim.p.sttvm_faster_whisper')
try:
import faster_whisper
except ModuleNotFoundError:
if typing.TYPE_CHECKING:
import faster_whisper
@dataclass
class Configuration:
model_size: str
class FasterWhisperModel(Model):
def __init__(self):
self._result: str = ''
self._config = FasterWhisperSettings()
@property
def result(self) -> str:
return self._result
def transcribe(self, result: Results, audio_file: Path) -> None:
model = faster_whisper.WhisperModel(self._config.model_size, compute_type="float32")
log.debug('model size is used = %s', self._config.model_size)
segments, _ = model.transcribe(audio_file)
segments = list(segments)
result.text = ''
log.debug('segments = %s', segments)
for segment in segments:
result.text += segment.text
print("[%.2fs -> %.2fs] %s" % (
segment.start, segment.end, segment.text))
def set_config(self, config: FasterWhisperSettings) -> None:
self._config = config

View File

@@ -21,3 +21,8 @@ from dataclasses import dataclass, field
class OpenAIWhisperSettings: class OpenAIWhisperSettings:
model_size: str = field(default='tiny', init=True) model_size: str = field(default='tiny', init=True)
translate_to_english: bool = field(default=False, init=True) translate_to_english: bool = field(default=False, init=True)
@dataclass
class FasterWhisperSettings:
model_size: str = field(default='tiny', init=True)
translate_to_english: bool = field(default=False, init=True)

View File

@@ -15,6 +15,7 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from pathlib import Path from pathlib import Path
from typing import Any
from ..helper import Results from ..helper import Results
@@ -22,5 +23,9 @@ from ..helper import Results
class Model(ABC): class Model(ABC):
@abstractmethod @abstractmethod
def transcribe(self, result: Results, audio_file: Path) -> str: def transcribe(self, result: Results, audio_file: Path) -> None:
return '' pass
@abstractmethod
def set_config(self, config: Any) -> None:
pass

View File

@@ -37,7 +37,6 @@ class Configuration:
class WhisperModel(Model): class WhisperModel(Model):
def __init__(self): def __init__(self):
# TODO
self._result: str = '' self._result: str = ''
self._config = OpenAIWhisperSettings() self._config = OpenAIWhisperSettings()
@@ -45,7 +44,7 @@ class WhisperModel(Model):
def result(self) -> str: def result(self) -> str:
return self._result return self._result
def transcribe(self, result: Results, audio_file: Path) -> str: def transcribe(self, result: Results, audio_file: Path) -> None:
model = whisper.load_model(self._config.model_size) model = whisper.load_model(self._config.model_size)
log.debug('model size is used = %s', self._config.model_size) log.debug('model size is used = %s', self._config.model_size)
result.text = model.transcribe(audio_file)['text'] # pyright: ignore [reportAttributeAccessIssue] result.text = model.transcribe(audio_file)['text'] # pyright: ignore [reportAttributeAccessIssue]

View File

@@ -38,6 +38,11 @@ class STTVoiceMessagesPlugin(GajimPlugin):
OpenAIWhisperSettings( OpenAIWhisperSettings(
model_size='tiny', model_size='tiny',
translate_to_english=False), translate_to_english=False),
''),
'model_faster-whisper': (
FasterWhisperSettings(
model_size='tiny',
translate_to_english=False),
'') '')
} }