Expand description
Small wrapper around sentencepiece’s esaxx suffix array C++ library. Usage
#[cfg(feature="cpp")]
{
let string = "abracadabra";
let suffix = esaxx_rs::suffix(string).unwrap();
let chars: Vec<_> = string.chars().collect();
let mut iter = suffix.iter();
assert_eq!(iter.next().unwrap(), (&chars[..4], 2)); // abra
assert_eq!(iter.next(), Some((&chars[..1], 5))); // a
assert_eq!(iter.next(), Some((&chars[1..4], 2))); // bra
assert_eq!(iter.next(), Some((&chars[2..4], 2))); // ra
assert_eq!(iter.next(), Some((&chars[..0], 11))); // ''
assert_eq!(iter.next(), None);
}The previous version uses unsafe optimized c++ code. There exists another implementation a bit slower (~2x slower) that uses safe rust. It’s a bit slower because it uses usize (mostly 64bit) instead of i32 (32bit). But it does seems to fix a few OOB issues in the cpp version (which never seemed to cause real problems in tests but still.)
let string = "abracadabra";
let suffix = esaxx_rs::suffix_rs(string).unwrap();
let chars: Vec<_> = string.chars().collect();
let mut iter = suffix.iter();
assert_eq!(iter.next().unwrap(), (&chars[..4], 2)); // abra
assert_eq!(iter.next(), Some((&chars[..1], 5))); // a
assert_eq!(iter.next(), Some((&chars[1..4], 2))); // bra
assert_eq!(iter.next(), Some((&chars[2..4], 2))); // ra
assert_eq!(iter.next(), Some((&chars[..0], 11))); // ''
assert_eq!(iter.next(), None);Structs§
Functions§
- Creates the suffix array and provides an iterator over its items (c++ unsafe version)
- Creates the suffix array and provides an iterator over its items (Rust version) See suffix