pub mod audio;
pub mod model;
pub mod quantized_model;
use serde::Deserialize;
#[derive(Debug, Clone, PartialEq, Deserialize)]
pub struct Config {
pub num_mel_bins: usize, pub max_source_positions: usize, pub d_model: usize, pub encoder_attention_heads: usize, pub encoder_layers: usize, pub vocab_size: usize, pub max_target_positions: usize, pub decoder_attention_heads: usize, pub decoder_layers: usize, #[serde(default)]
pub suppress_tokens: Vec<u32>,
}
pub const DTYPE: candle::DType = candle::DType::F32;
pub const SAMPLE_RATE: usize = 16000;
pub const N_FFT: usize = 400;
pub const HOP_LENGTH: usize = 160;
pub const CHUNK_LENGTH: usize = 30;
pub const N_SAMPLES: usize = CHUNK_LENGTH * SAMPLE_RATE; pub const N_FRAMES: usize = N_SAMPLES / HOP_LENGTH; pub const NO_SPEECH_THRESHOLD: f64 = 0.6;
pub const LOGPROB_THRESHOLD: f64 = -1.0;
pub const TEMPERATURES: [f64; 6] = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0];
pub const COMPRESSION_RATIO_THRESHOLD: f64 = 2.4;
pub const SOT_TOKEN: &str = "<|startoftranscript|>";
pub const TRANSCRIBE_TOKEN: &str = "<|transcribe|>";
pub const TRANSLATE_TOKEN: &str = "<|translate|>";
pub const NO_TIMESTAMPS_TOKEN: &str = "<|notimestamps|>";
pub const EOT_TOKEN: &str = "<|endoftext|>";
pub const NO_SPEECH_TOKENS: [&str; 2] = ["<|nocaptions|>", "<|nospeech|>"];