Initial commit

This commit is contained in:
2025-12-03 16:44:29 +03:00
commit 4f7f22e95f
15 changed files with 1693 additions and 0 deletions

66
src/gigaam_onnx/v3_ctc.py Normal file
View File

@@ -0,0 +1,66 @@
import numpy as np
from .preprocess import FeatureExtractor, load_audio
import onnxruntime as rt
from .decoding import CTCGreedyDecoding, Tokenizer
from .ctc import CTCASR
_CTC_VOCAB = [
' ',
'а',
'б',
'в',
'г',
'д',
'е',
'ж',
'з',
'и',
'й',
'к',
'л',
'м',
'н',
'о',
'п',
'р',
'с',
'т',
'у',
'ф',
'х',
'ц',
'ч',
'ш',
'щ',
'ъ',
'ы',
'ь',
'э',
'ю',
'я',
]
class GigaAMV3CTC(CTCASR):
preprocessor: FeatureExtractor
model_path: str
decoding: CTCGreedyDecoding
def __init__(self, model_path: str, provider: str, opts: rt.SessionOptions):
self.model_path = model_path
preprocessor = FeatureExtractor(
sample_rate=16000,
features=64,
win_length=320,
hop_length=160,
mel_scale='htk',
n_fft=320,
mel_norm=None,
center=False
)
tokenizer = Tokenizer(_CTC_VOCAB)
encoder = rt.InferenceSession(self.model_path, providers=[provider], sess_options=opts)
super().__init__(preprocessor, tokenizer, encoder)