Initial commit
This commit is contained in:
98
README.md
Normal file
98
README.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# GigaAM ASR for ONNX
|
||||
## Project purpose:
|
||||
Usage of a latest (v3) versions of a GigaAMASR without additional dependencies and localy stored models
|
||||
|
||||
## Project setup:
|
||||
1. Set up original GigaAM project
|
||||
```bash
|
||||
# Clone original GigaAM repo:
|
||||
git clone https://github.com/salute-developers/GigaAM
|
||||
cd GigaAM
|
||||
|
||||
# Create temp venv:
|
||||
python -m venv ./tmp_venv
|
||||
source ./tmp_venv/bin/activate
|
||||
|
||||
# Install project:
|
||||
pip install -e .
|
||||
```
|
||||
2. Acquire chosen models:
|
||||
```python
|
||||
import gigaam
|
||||
onnx_dir = '/target/onnx/model/paths'
|
||||
model_version = 'v3_ctc' # Options: v3_* models
|
||||
|
||||
model = gigaam.load_model(model_version)
|
||||
model.to_onnx(dir_path=onnx_dir)
|
||||
```
|
||||
3. Fetch tokenizer SentencePieceProcessor model from cache
|
||||
- From `~/.cache/gigaam/{model_name}_tokenizer.model`
|
||||
- From `https://cdn.chatwm.opensmodel.sberdevices.ru/GigaAM/{model_name}_tokenizer.model`
|
||||
4. Then you may remove original project:
|
||||
```bash
|
||||
cd ..
|
||||
rm -r ./GigaAM
|
||||
```
|
||||
5. Install this (gigaam-onnx) project
|
||||
6. Set up onnx runtime and load chosen model:
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
from gigaam_onnx import GigaAMV3E2ERNNT, GigaAMV3RNNT, GigaAMV3E2ECTC, GigaAMV3CTC
|
||||
import numpy as np
|
||||
|
||||
# Set up ONNX runtime
|
||||
if 'CUDAExecutionProvider' in ort.get_available_providers():
|
||||
provider = 'CUDAExecutionProvider'
|
||||
else:
|
||||
provider = 'CPUExecutionProvider"
|
||||
opts = ort.SessionOptions()
|
||||
opts.intra_op_num_threads = 16
|
||||
opts.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||
opts.log_severity_level = 3
|
||||
|
||||
e2e_rnnt_model = GigaAMV3E2ERNNT(
|
||||
'/path/to/onnx/files/v3_e2e_rnnt_decoder.onnx',
|
||||
'/path/to/onnx/files/v3_e2e_rnnt_encoder.onnx',
|
||||
'/path/to/onnx/files/v3_e2e_rnnt_joint.onnx',
|
||||
'/path/to/onnx/files/v3_e2e_rnnt_tokenizer.model',
|
||||
provider,
|
||||
opts
|
||||
)
|
||||
|
||||
rnnt_model = GigaAMV3RNNT(
|
||||
'/path/to/onnx/files/v3_rnnt_decoder.onnx',
|
||||
'/path/to/onnx/files/v3_rnnt_encoder.onnx',
|
||||
'/path/to/onnx/files/v3_rnnt_joint.onnx',
|
||||
provider,
|
||||
opts
|
||||
)
|
||||
|
||||
e2e_ctc_model = GigaAMV3E2ECTC(
|
||||
'/path/to/onnx/files/v3_e2e_ctc.onnx',
|
||||
'/path/to/onnx/files/v3_e2e_ctc_tokenizer.model',
|
||||
provider,
|
||||
opts
|
||||
)
|
||||
|
||||
ctc_model = GigaAMV3CTC(
|
||||
'/path/to/onnx/files/v3_ctc.onnx',
|
||||
provider,
|
||||
opts
|
||||
)
|
||||
|
||||
# Load wav 16kHz mono PCM
|
||||
wav_data = ...
|
||||
audio_array = np.array(wav_data)
|
||||
|
||||
# Single fragment transcribing with per-char timings
|
||||
text, timings = ctc_model.transcribe(audio_array)
|
||||
|
||||
# Batch transcribing with per-char timings
|
||||
text, timings = e2e_ctc_model.transcribe_batch([audio_array])[0]
|
||||
|
||||
# Batch joined transcribing - joins fragments by lengts provided and returns continuous text with per-char timings
|
||||
text, timings = e2e_rnnt_model.transcribe_batch(
|
||||
[audio_array] # audio chunks
|
||||
[1] # length of chunks to combine
|
||||
)[0]
|
||||
```
|
||||
Reference in New Issue
Block a user