Struct tokenizers::tokenizer::Encoding

source ·
pub struct Encoding { /* private fields */ }
Expand description

Represents the output of a Tokenizer.

Implementations§

source§

impl Encoding

source

pub fn new( ids: Vec<u32>, type_ids: Vec<u32>, tokens: Vec<String>, words: Vec<Option<u32>>, offsets: Vec<Offsets>, special_tokens_mask: Vec<u32>, attention_mask: Vec<u32>, overflowing: Vec<Self>, sequence_ranges: HashMap<usize, Range<usize>> ) -> Self

source

pub fn with_capacity(len: usize) -> Self

source

pub fn from_tokens(tokens: Vec<Token>, type_id: u32) -> Self

source

pub fn is_empty(&self) -> bool

Whether this Encoding is empty

source

pub fn len(&self) -> usize

Return the total length of this Encoding

source

pub fn n_sequences(&self) -> usize

Return the number of sequences combined in this Encoding

source

pub fn set_sequence_id(&mut self, sequence_id: usize)

Set the given sequence id for the whole range of tokens contained in this Encoding

source

pub fn get_tokens(&self) -> &[String]

source

pub fn get_word_ids(&self) -> &[Option<u32>]

source

pub fn get_word_ids_mut(&mut self) -> &mut [Option<u32>]

source

pub fn get_sequence_ids(&self) -> Vec<Option<usize>>

source

pub fn get_ids(&self) -> &[u32]

source

pub fn get_type_ids(&self) -> &[u32]

source

pub fn set_type_ids(&mut self, type_ids: Vec<u32>)

source

pub fn get_offsets(&self) -> &[Offsets]

source

pub fn get_offsets_mut(&mut self) -> &mut [Offsets]

source

pub fn get_special_tokens_mask(&self) -> &[u32]

source

pub fn get_attention_mask(&self) -> &[u32]

source

pub fn get_overflowing(&self) -> &Vec<Encoding>

source

pub fn set_overflowing(&mut self, overflowing: Vec<Encoding>)

source

pub fn get_overflowing_mut(&mut self) -> &mut Vec<Encoding>

source

pub fn take_overflowing(&mut self) -> Vec<Encoding>

source

pub fn token_to_sequence(&self, token: usize) -> Option<usize>

Returns the index of the sequence containing the given token

source

pub fn word_to_tokens( &self, word: u32, sequence_id: usize ) -> Option<(usize, usize)>

Get the encoded tokens corresponding to the word at the given index in the input sequence, with the form (start_token, end_token + 1)

source

pub fn word_to_chars(&self, word: u32, sequence_id: usize) -> Option<Offsets>

Get the offsets of the word at the given index in the input sequence.

source

pub fn token_to_chars(&self, token: usize) -> Option<(usize, Offsets)>

Get the offsets of the token at the given index.

source

pub fn token_to_word(&self, token: usize) -> Option<(usize, u32)>

Get the word that contains the token at the given index.

source

pub fn char_to_token(&self, pos: usize, sequence_id: usize) -> Option<usize>

Get the token that contains the given char.

source

pub fn char_to_word(&self, pos: usize, sequence_id: usize) -> Option<u32>

Get the word that contains the given char.

source

pub fn truncate( &mut self, max_len: usize, stride: usize, direction: TruncationDirection )

Truncate the current Encoding.

Panics if stride >= max_len

source

pub fn merge<I: IntoIterator<Item = Encoding>>( encodings: I, growing_offsets: bool ) -> Self

Merge all Encodings together

source

pub fn merge_with(&mut self, pair: Encoding, growing_offsets: bool)

Merge ourself with the given Encoding. Happens in place.

source

pub fn pad( &mut self, target_length: usize, pad_id: u32, pad_type_id: u32, pad_token: &str, direction: PaddingDirection )

Trait Implementations§

source§

impl Clone for Encoding

source§

fn clone(&self) -> Encoding

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for Encoding

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl Default for Encoding

source§

fn default() -> Encoding

Returns the “default value” for a type. Read more
source§

impl<'de> Deserialize<'de> for Encoding

source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
source§

impl FromIterator<(u32, String, (usize, usize), Option<u32>, u32)> for Encoding

source§

fn from_iter<I: IntoIterator<Item = (u32, String, (usize, usize), Option<u32>, u32)>>( iter: I ) -> Self

Creates a value from an iterator. Read more
source§

impl FromIterator<Encoding> for Encoding

source§

fn from_iter<I: IntoIterator<Item = Encoding>>(iter: I) -> Self

Creates a value from an iterator. Read more
source§

impl PartialEq for Encoding

source§

fn eq(&self, other: &Encoding) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
source§

impl Serialize for Encoding

source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more
source§

impl StructuralPartialEq for Encoding

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> Pointable for T

source§

const ALIGN: usize = _

The alignment of pointer.
§

type Init = T

The type for initializers.
source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

source§

fn vzip(self) -> V

source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,