use crate::tokenizer::{NormalizedString, Normalizer, Result};
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(tag = "type")]
pub struct Prepend {
pub prepend: String,
}
impl Prepend {
pub fn new(prepend: String) -> Self {
Self { prepend }
}
}
impl Normalizer for Prepend {
fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
if !normalized.is_empty() {
normalized.prepend(&self.prepend);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_prepend() {
let original = "Hello";
let normalized = "▁Hello";
assert_ne!(original, normalized);
let mut n = NormalizedString::from(original);
let prepend = Prepend::new("▁".to_string());
prepend.normalize(&mut n).unwrap();
assert_eq!(&n.get(), &normalized);
assert_eq!(
n,
NormalizedString::new(
original.to_string(),
normalized.to_string(),
vec![
(0, 1),
(0, 1),
(0, 1),
(0, 1),
(1, 2),
(2, 3),
(3, 4),
(4, 5)
],
0
)
);
assert_eq!(
n.alignments_original(),
vec![(0, 4), (4, 5), (5, 6), (6, 7), (7, 8)]
);
}
}