Struct tokenizers::models::wordpiece::WordPieceTrainer

source ·
pub struct WordPieceTrainer { /* private fields */ }
Expand description

Trains a WordPiece model.

Implementations§

source§

impl WordPieceTrainer

source

pub fn min_frequency(&self) -> u64

source

pub fn set_min_frequency(&mut self, freq: u64)

source

pub fn vocab_size(&self) -> usize

source

pub fn set_vocab_size(&mut self, size: usize)

source

pub fn show_progress(&self) -> bool

source

pub fn set_show_progress(&mut self, show_progress: bool)

source

pub fn special_tokens(&self) -> &[AddedToken]

source

pub fn set_special_tokens(&mut self, special_tokens: Vec<AddedToken>)

source

pub fn limit_alphabet(&self) -> Option<usize>

source

pub fn set_limit_alphabet(&mut self, limit: Option<usize>)

source

pub fn initial_alphabet(&self) -> &HashSet<char>

source

pub fn set_initial_alphabet(&mut self, alphabet: HashSet<char>)

source

pub fn continuing_subword_prefix(&self) -> &Option<String>

source

pub fn set_continuing_subword_prefix(&mut self, prefix: Option<String>)

source

pub fn end_of_word_suffix(&self) -> &Option<String>

source

pub fn set_end_of_word_suffix(&mut self, suffix: Option<String>)

source

pub fn builder() -> WordPieceTrainerBuilder

source

pub fn train(&self, model: &mut WordPiece) -> Result<Vec<AddedToken>>

Trait Implementations§

source§

impl Clone for WordPieceTrainer

source§

fn clone(&self) -> WordPieceTrainer

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Default for WordPieceTrainer

source§

fn default() -> WordPieceTrainer

Returns the “default value” for a type. Read more
source§

impl<'de> Deserialize<'de> for WordPieceTrainer

source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
source§

impl From<WordPieceTrainer> for TrainerWrapper

source§

fn from(from: WordPieceTrainer) -> Self

Converts to this type from the input type.
source§

impl Serialize for WordPieceTrainer

source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more
source§

impl Trainer for WordPieceTrainer

§

type Model = WordPiece

source§

fn train(&self, model: &mut WordPiece) -> Result<Vec<AddedToken>>

The actual training method. This will return a new trained Model as well as a list of special_tokens to be added directly to the tokenizer along with the model.
source§

fn should_show_progress(&self) -> bool

Whether we should show progress during the training.
source§

fn feed<I, S, F>(&mut self, iterator: I, process: F) -> Result<()>
where I: Iterator<Item = S> + Send, S: AsRef<str> + Send, F: Fn(&str) -> Result<Vec<String>> + Sync,

Process an iterator of sequences, calling process for each of them in order to pre-process the said sequence as relevant.

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> Pointable for T

source§

const ALIGN: usize = _

The alignment of pointer.
§

type Init = T

The type for initializers.
source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
source§

impl<T> ToOwned for T
where T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

source§

fn vzip(self) -> V

source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,