use candle::{Result, Tensor};
pub fn apply_repeat_penalty(logits: &Tensor, penalty: f32, context: &[u32]) -> Result<Tensor> {
let device = logits.device();
let mut logits = logits.to_dtype(candle::DType::F32)?.to_vec1::<f32>()?;
let mut already_seen = std::collections::HashSet::new();
for token_id in context {
if already_seen.contains(token_id) {
continue;
}
already_seen.insert(token_id);
if let Some(logit) = logits.get_mut(*token_id as usize) {
if *logit >= 0. {
*logit /= penalty
} else {
*logit *= penalty
}
}
}
let logits_len = logits.len();
Tensor::from_vec(logits, logits_len, device)
}
pub fn repeat_kv(xs: Tensor, n_rep: usize) -> Result<Tensor> {
if n_rep == 1 {
Ok(xs)
} else {
let (b_sz, n_kv_head, seq_len, head_dim) = xs.dims4()?;
Tensor::cat(&vec![&xs; n_rep], 2)?.reshape((b_sz, n_kv_head * n_rep, seq_len, head_dim))
}
}