Struct tokenizers :: tokenizer :: Encoding Copy item path

impl Encoding

pub fn new( ids: Vec<u32>, type_ids: Vec<u32>, tokens: Vec<String>, words: Vec<Option<u32>>, offsets: Vec<Offsets>, special_tokens_mask: Vec<u32>, attention_mask: Vec<u32>, overflowing: Vec<Self>, sequence_ranges: HashMap<usize, Range<usize>> ) -> Self

pub fn with_capacity(len: usize) -> Self

pub fn from_tokens(tokens: Vec<Token>, type_id: u32) -> Self

pub fn is_empty(&self) -> bool

Whether this Encoding is empty

pub fn len(&self) -> usize

Return the total length of this Encoding

pub fn n_sequences(&self) -> usize

Return the number of sequences combined in this Encoding

pub fn set_sequence_id(&mut self, sequence_id: usize)

Set the given sequence id for the whole range of tokens contained in this Encoding

pub fn token_to_sequence(&self, token: usize) -> Option<usize>

Returns the index of the sequence containing the given token

pub fn word_to_tokens( &self, word: u32, sequence_id: usize ) -> Option<(usize, usize)>

Get the encoded tokens corresponding to the word at the given index in the input sequence, with the form (start_token, end_token + 1)

pub fn word_to_chars(&self, word: u32, sequence_id: usize) -> Option<Offsets>

Get the offsets of the word at the given index in the input sequence.

pub fn token_to_chars(&self, token: usize) -> Option<(usize, Offsets)>

Get the offsets of the token at the given index.

pub fn token_to_word(&self, token: usize) -> Option<(usize, u32)>

Get the word that contains the token at the given index.

pub fn char_to_token(&self, pos: usize, sequence_id: usize) -> Option<usize>

Get the token that contains the given char.

pub fn char_to_word(&self, pos: usize, sequence_id: usize) -> Option<u32>

Get the word that contains the given char.

pub fn truncate( &mut self, max_len: usize, stride: usize, direction: TruncationDirection )

Truncate the current Encoding.

Panics if stride >= max_len

pub fn merge<I: IntoIterator<Item = Encoding>>( encodings: I, growing_offsets: bool ) -> Self

Merge all Encodings together

pub fn merge_with(&mut self, pair: Encoding, growing_offsets: bool)

Merge ourself with the given Encoding. Happens in place.

pub fn pad( &mut self, target_length: usize, pad_id: u32, pad_type_id: u32, pad_token: &str, direction: PaddingDirection )

Trait Implementations§

impl Clone for Encoding

fn clone(&self) -> Encoding

Returns a copy of the value. Read more

1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

impl Debug for Encoding

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

impl Default for Encoding

fn default() -> Encoding

Returns the “default value” for a type. Read more

impl<'de> Deserialize<'de> for Encoding

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more

impl FromIterator<(u32, String, (usize, usize), Option<u32>, u32)> for Encoding

fn from_iter<I: IntoIterator<Item = (u32, String, (usize, usize), Option<u32>, u32)>>( iter: I ) -> Self

Creates a value from an iterator. Read more

impl FromIterator<Encoding> for Encoding

fn from_iter<I: IntoIterator<Item = Encoding>>(iter: I) -> Self

Creates a value from an iterator. Read more

impl PartialEq for Encoding

fn eq(&self, other: &Encoding) -> bool

This method tests for self and other values to be equal, and is used by ==.

1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.

impl Serialize for Encoding

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

Serialize this value into the given Serde serializer. Read more

impl StructuralPartialEq for Encoding

Auto Trait Implementations§

impl Freeze for Encoding

impl RefUnwindSafe for Encoding

impl Send for Encoding

impl Sync for Encoding

impl Unpin for Encoding

impl UnwindSafe for Encoding

Blanket Implementations§

impl<T> Any for T
where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

impl<T> Borrow<T> for T
where T: ?Sized,

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

impl<T> From<T> for T

fn from(t: T) -> T

Returns the argument unchanged.

impl<T, U> Into for T
where U: From<T>,

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

impl<T> Pointable for T

const ALIGN: usize = _

The alignment of pointer.

type Init = T

The type for initializers.

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

impl<T> ToOwned for T
where T: Clone,

type Owned = T

The resulting type after obtaining ownership.

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

impl<T, U> TryFrom for T
where U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

impl<T, U> TryInto for T
where U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn vzip(self) -> V