Model reference¶
audiotext.models ¶
FAST_TRANSCRIPTION_DEFAULTS
module-attribute
¶
FAST_TRANSCRIPTION_DEFAULTS = {
"beam_size": 1,
"best_of": 1,
"vad_filter": True,
"temperature": 0.0,
"condition_on_previous_text": False,
"without_timestamps": False,
"word_timestamps": False,
"vad_min_silence_duration_ms": 2000,
"vad_speech_pad_ms": 400,
"no_speech_threshold": 0.6,
}
ModelPreset
dataclass
¶
A ready-to-load transcription model configuration.
Source code in src/audiotext/models.py
default_options
class-attribute
instance-attribute
¶
capabilities
class-attribute
instance-attribute
¶
capabilities = field(
default_factory=lambda: {
"transcription": True,
"translation": False,
"language_detection": True,
"timestamps": True,
"word_timestamps": True,
"streaming": False,
"diarization": False,
"punctuation": True,
"capitalization": True,
}
)
__init__ ¶
__init__(
name,
backend,
model,
device,
compute_type,
size,
description,
release_date,
model_url,
parameters,
ram,
languages=("en", "es", "ca", "auto"),
default_options=(
lambda: dict(FAST_TRANSCRIPTION_DEFAULTS)
)(),
capabilities=(
lambda: {
"transcription": True,
"translation": False,
"language_detection": True,
"timestamps": True,
"word_timestamps": True,
"streaming": False,
"diarization": False,
"punctuation": True,
"capitalization": True,
}
)(),
)