Initial commit

2025-12-03 16:44:29 +03:00
commit 4f7f22e95f
15 changed files with 1693 additions and 0 deletions
--- a/src/gigaam_onnx/v3_ctc.py
+++ b/src/gigaam_onnx/v3_ctc.py
@@ -0,0 +1,66 @@
+import numpy as np
+
+from .preprocess import FeatureExtractor, load_audio
+import onnxruntime as rt
+
+from .decoding import CTCGreedyDecoding, Tokenizer
+from .ctc import CTCASR
+
+_CTC_VOCAB = [
+    ' ',
+    'а',
+    'б',
+    'в',
+    'г',
+    'д',
+    'е',
+    'ж',
+    'з',
+    'и',
+    'й',
+    'к',
+    'л',
+    'м',
+    'н',
+    'о',
+    'п',
+    'р',
+    'с',
+    'т',
+    'у',
+    'ф',
+    'х',
+    'ц',
+    'ч',
+    'ш',
+    'щ',
+    'ъ',
+    'ы',
+    'ь',
+    'э',
+    'ю',
+    'я',
+]
+
+
+class GigaAMV3CTC(CTCASR):
+    preprocessor: FeatureExtractor
+    model_path: str
+    decoding: CTCGreedyDecoding
+
+    def __init__(self, model_path: str, provider: str, opts: rt.SessionOptions):
+        self.model_path = model_path
+        preprocessor = FeatureExtractor(
+            sample_rate=16000,
+            features=64,
+            win_length=320,
+            hop_length=160,
+            mel_scale='htk',
+            n_fft=320,
+            mel_norm=None,
+            center=False
+        )
+
+        tokenizer = Tokenizer(_CTC_VOCAB)
+        encoder = rt.InferenceSession(self.model_path, providers=[provider], sess_options=opts)
+        super().__init__(preprocessor, tokenizer, encoder)